diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml
new file mode 100644
index 00000000..339634d1
--- /dev/null
+++ b/.github/workflows/build_and_test.yaml
@@ -0,0 +1,81 @@
+name: build and test
+
+on:
+ push:
+ branches:
+ - develop
+ - "tests/**"
+ paths-ignore:
+ - "docs/**"
+ - "README.md"
+ pull_request:
+ branches:
+ - develop
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ build_and_test_cpu:
+ runs-on: ${{ matrix.os }}
+ strategy:
+ matrix:
+ os: [ubuntu-latest, windows-latest]
+ python-version: ["3.7", "3.8"]
+ include:
+ - python-version: "3.7"
+ os: windows-latest
+ gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl
+ - python-version: "3.7"
+ os: ubuntu-latest
+ gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+ - python-version: "3.8"
+ os: windows-latest
+ gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl
+ - python-version: "3.8"
+ os: ubuntu-latest
+ gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+ fail-fast: false
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python ${{ matrix.python-version }}
+ uses: actions/setup-python@v4
+ with:
+ python-version: ${{ matrix.python-version }}
+ - name: Upgrade pip
+ run: python -m pip install pip --upgrade --user
+ - name: Install PaddlePaddle
+ run: python -m pip install paddlepaddle==2.3.1 -i https://mirror.baidu.com/pypi/simple
+ - name: Install PaddleRS
+ run: |
+ python -m pip install -r requirements.txt
+ python -m pip install -e .
+ - name: Install GDAL
+ run: python -m pip install ${{ matrix.gdal-whl-url }}
+ - name: Run unittests
+ run: |
+ cd tests
+ bash run_fast_tests.sh
+ shell: bash
+
+ build_and_test_cuda102:
+ runs-on: ubuntu-18.04
+ container:
+ image: registry.baidubce.com/paddlepaddle/paddle:2.3.1-gpu-cuda10.2-cudnn7
+ steps:
+ - uses: actions/checkout@v3
+ - name: Upgrade pip
+ run: python3.7 -m pip install pip --upgrade --user
+ - name: Install PaddleRS
+ run: |
+ python3.7 -m pip install -r requirements.txt
+ python3.7 -m pip install -e .
+ - name: Install GDAL
+ run: python3.7 -m pip install https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl
+ # Do not run unittests, because there is NO GPU in the machine.
+ # - name: Run unittests
+ # run: |
+ # cd tests
+ # bash run_fast_tests.sh
+ # shell: bash
\ No newline at end of file
diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml
new file mode 100644
index 00000000..939add7f
--- /dev/null
+++ b/.github/workflows/lint.yaml
@@ -0,0 +1,25 @@
+name: lint
+
+on: [push, pull_request]
+
+concurrency:
+ group: ${{ github.workflow }}-${{ github.ref }}
+ cancel-in-progress: true
+
+jobs:
+ lint:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - name: Set up Python 3.7
+ uses: actions/setup-python@v4
+ with:
+ python-version: 3.7
+ - name: Upgrade pip
+ run: python -m pip install pip --upgrade --user
+ - name: Install pre-commit hooks
+ run: |
+ pip install pre-commit
+ pre-commit install
+ - name: Lint
+ run: pre-commit run --all-files
\ No newline at end of file
diff --git a/README.md b/README.md
index 3626156d..193217a6 100644
--- a/README.md
+++ b/README.md
@@ -6,9 +6,9 @@
**基于飞桨框架开发的高性能遥感图像处理开发套件,端到端地完成从训练到部署的全流程遥感深度学习应用。**
-
-
- [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+
+ [![license](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE)
+ [![build status](https://github.com/PaddleCV-SIG/PaddleRS/workflows/build_and_test.yaml/badge.svg?branch=develop)](https://github.com/PaddleCV-SIG/PaddleRS/actions)
![python version](https://img.shields.io/badge/python-3.7+-orange.svg)
![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg)
@@ -92,7 +92,7 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
ResizeByShort
RandomResizeByShort
ResizeByLong
- RandomFlipOrRotation
+ RandomFlipOrRotate
RandomHorizontalFlip
RandomVerticalFlip
Normalize
@@ -100,13 +100,13 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
RandomCrop
RandomScaleAspect
RandomExpand
- Padding
+ Pad
MixupImage
RandomDistort
RandomBlur
- Defogging
- DimReducing
- BandSelecting
+ Dehaze
+ ReduceDim
+ SelectBand
RandomSwap
@@ -223,4 +223,3 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
year={2022}
}
```
-
diff --git a/deploy/export/README.md b/deploy/export/README.md
index ea03a4b1..d3e2eb7a 100644
--- a/deploy/export/README.md
+++ b/deploy/export/README.md
@@ -60,4 +60,3 @@ python deploy/export_model.py --model_dir=./output/deeplabv3p/best_model/ --save
- 对于检测模型中的YOLO/PPYOLO系列模型,请保证输入影像的`w`和`h`有相同取值、且均为32的倍数;指定`--fixed_input_shape`时,R-CNN模型的`w`和`h`也均需为32的倍数。
- 指定`[w,h]`时,请使用半角逗号(`,`)分隔`w`和`h`,二者之间不允许存在空格等其它字符。
- 将`w`和`h`设得越大,则模型在推理过程中的耗时和内存/显存占用越高。不过,如果`w`和`h`过小,则可能对模型的精度存在较大负面影响。
-- 对于变化检测模型BIT,请保证指定`--fixed_input_shape`,并且数值不包含负数,因为BIT用到空间注意力,需要从tensor中获取`b,c,h,w`的属性,若为负数则报错。
diff --git a/deploy/export/export_model.py b/deploy/export/export_model.py
index 0fc3c9d1..b43c7fc2 100644
--- a/deploy/export/export_model.py
+++ b/deploy/export/export_model.py
@@ -21,9 +21,23 @@
def get_parser():
parser = argparse.ArgumentParser()
- parser.add_argument('--model_dir', '-m', type=str, default=None, help='model directory path')
- parser.add_argument('--save_dir', '-s', type=str, default=None, help='path to save inference model')
- parser.add_argument('--fixed_input_shape', '-fs', type=str, default=None,
+ parser.add_argument(
+ '--model_dir',
+ '-m',
+ type=str,
+ default=None,
+ help='model directory path')
+ parser.add_argument(
+ '--save_dir',
+ '-s',
+ type=str,
+ default=None,
+ help='path to save inference model')
+ parser.add_argument(
+ '--fixed_input_shape',
+ '-fs',
+ type=str,
+ default=None,
help="export inference model with fixed input shape: [w,h] or [n,c,w,h]")
return parser
@@ -39,13 +53,17 @@ def get_parser():
fixed_input_shape = literal_eval(args.fixed_input_shape)
# Check validaty
if not isinstance(fixed_input_shape, list):
- raise ValueError("fixed_input_shape should be of None or list type.")
+ raise ValueError(
+ "fixed_input_shape should be of None or list type.")
if len(fixed_input_shape) not in (2, 4):
- raise ValueError("fixed_input_shape contains an incorrect number of elements.")
+ raise ValueError(
+ "fixed_input_shape contains an incorrect number of elements.")
if fixed_input_shape[-1] <= 0 or fixed_input_shape[-2] <= 0:
- raise ValueError("the input width and height must be positive integers.")
- if len(fixed_input_shape)==4 and fixed_input_shape[1] <= 0:
- raise ValueError("the number of input channels must be a positive integer.")
+ raise ValueError(
+ "Input width and height must be positive integers.")
+ if len(fixed_input_shape) == 4 and fixed_input_shape[1] <= 0:
+ raise ValueError(
+ "The number of input channels must be a positive integer.")
# Set environment variables
os.environ['PADDLEX_EXPORT_STAGE'] = 'True'
@@ -56,4 +74,4 @@ def get_parser():
# Do dynamic-to-static cast
# XXX: Invoke a protected (single underscore) method outside of subclasses.
- model._export_inference_model(args.save_dir, fixed_input_shape)
\ No newline at end of file
+ model._export_inference_model(args.save_dir, fixed_input_shape)
diff --git a/docs/README.md b/docs/README.md
index 2479f0fd..f2cd104d 100644
--- a/docs/README.md
+++ b/docs/README.md
@@ -1,5 +1,3 @@
-PaddleSeg commit fec42fd869b6f796c74cd510671595e3512bc8e9
-
# 开发规范
请注意,paddlers/models/ppxxx系列除了修改import路径和支持多通道模型外,不要增删改任何代码。
-新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。
\ No newline at end of file
+新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。
diff --git a/docs/apis/model_zoo.md b/docs/apis/model_zoo.md
index 45570b21..d7c16932 100644
--- a/docs/apis/model_zoo.md
+++ b/docs/apis/model_zoo.md
@@ -4,18 +4,18 @@ PaddleRS的基础模型库来自[PaddleClas](https://github.com/PaddlePaddle/Pad
## 自定义模型库
-| 模型名称 | 用途 |
-| --------------- | -------- |
+| 模型名称 | 用途 |
+| --------------- | -------- |
| FarSeg | 语义分割 |
| BIT | 变化检测 |
| CDNet | 变化检测 |
| DSIFN | 变化检测 |
| STANet | 变化检测 |
-| SNUNet | 变化检测 |
+| SNUNet | 变化检测 |
| DSAMNet | 变化检测 |
-| FCEarlyFusion | 变化检测 |
-| FCSiamConc | 变化检测 |
-| FCSiamDiff | 变化检测 |
+| FCEarlyFusion | 变化检测 |
+| FCSiamConc | 变化检测 |
+| FCSiamDiff | 变化检测 |
## 如何导入
diff --git a/docs/apis/transforms.md b/docs/apis/transforms.md
index f05e9d26..9721ec7d 100644
--- a/docs/apis/transforms.md
+++ b/docs/apis/transforms.md
@@ -1,6 +1,6 @@
# 数据增强
-PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`ImgDecoder`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
+PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`DecodeImg`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。
| 数据增强名称 | 用途 | 任务 | ... |
| -------------------- | ----------------------------------------------- | -------- | ---- |
@@ -16,13 +16,13 @@ PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Co
| RandomCrop | 对输入进行随机中心裁剪 | 所有 | ... |
| RandomScaleAspect | 裁剪输入并重新调整大小至原始大小 | 所有 | ... |
| RandomExpand | 通过根据随机偏移填充来随机扩展输入 | 所有 | ... |
-| Padding | 将输入填充到指定的大小 | 所有 | ... |
+| Pad | 将输入填充到指定的大小 | 所有 | ... |
| MixupImage | 将两张图片和它们的`gt_bbbox/gt_score`混合在一起 | 目标检测 | ... |
| RandomDistort | 对输入进行随机色彩变换 | 所有 | ... |
| RandomBlur | 对输入进行随机模糊 | 所有 | ... |
-| Defogging | 对输入图像进行去雾 | 所有 | ... |
-| DimReducing | 对输入图像进行降维 | 所有 | ... |
-| BandSelecting | 选择输入图像的波段 | 所有 | ... |
+| Dehaze | 对输入图像进行去雾 | 所有 | ... |
+| ReduceDim | 对输入图像进行降维 | 所有 | ... |
+| SelectBand | 选择输入图像的波段 | 所有 | ... |
| RandomSwap | 随机交换两个输入图像 | 变化检测 | ... |
| ... | ... | | ... |
diff --git a/docs/data/coco_tools_cn.md b/docs/data/coco_tools_cn.md
index faf2ee10..362c4272 100644
--- a/docs/data/coco_tools_cn.md
+++ b/docs/data/coco_tools_cn.md
@@ -85,7 +85,7 @@ Args_show = True
------------------------------------------------Info------------------------------------------------
json read...
-json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
+json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories'])
***********************info***********************
Content Type: dict
@@ -409,7 +409,7 @@ Args_show = True
------------------------------------------------Info------------------------------------------------
json read...
-json keys: dict_keys(['images', 'categories'])
+json keys: dict_keys(['images', 'categories'])
**********************images**********************
Content Type: list
diff --git a/docs/data/dataset_summary.md b/docs/data/dataset_summary.md
index 0cdd2783..f36ca613 100644
--- a/docs/data/dataset_summary.md
+++ b/docs/data/dataset_summary.md
@@ -215,4 +215,4 @@
| [9-5](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 1.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 5120 * 5120 | 1 | 300 | __ | tif, jpg | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 |
| [9-6](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 2.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 768 * 384 | 1 | 5011 | __ | tif | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 |
| 9-7 | [DLR-ACD](https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/) | 图像生成 | 3619 * 5226 | 3 | 33 | 1 | __ | __ | 0.045~ 0.15m | __ | 航拍影像 | 航拍影像 | 2019 | German Aerospace Center | https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/ | |
-| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | |
\ No newline at end of file
+| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | |
diff --git a/docs/quick_start.md b/docs/quick_start.md
index 8e8b6579..ff9556b8 100644
--- a/docs/quick_start.md
+++ b/docs/quick_start.md
@@ -39,4 +39,4 @@ python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmenta
visualdl --logdir output/deeplabv3p_resnet50_multi_channel/vdl_log --port 8001
```
-服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001
\ No newline at end of file
+服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001
diff --git a/paddlers/__init__.py b/paddlers/__init__.py
index 3e97e1ff..3b71c112 100644
--- a/paddlers/__init__.py
+++ b/paddlers/__init__.py
@@ -21,4 +21,4 @@
log_level = 2
-from . import tasks, datasets, transforms, utils, tools, models, deploy
\ No newline at end of file
+from . import tasks, datasets, transforms, utils, tools, models, deploy
diff --git a/paddlers/custom_models/__init__.py b/paddlers/custom_models/__init__.py
index ea0abdf8..cbfbb1ff 100644
--- a/paddlers/custom_models/__init__.py
+++ b/paddlers/custom_models/__init__.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from . import cls, det, seg, gan, cd
\ No newline at end of file
+from . import cls, det, seg, gan, cd
diff --git a/paddlers/custom_models/cd/backbones/__init__.py b/paddlers/custom_models/cd/backbones/__init__.py
index eeae9aa1..29c8b7d1 100644
--- a/paddlers/custom_models/cd/backbones/__init__.py
+++ b/paddlers/custom_models/cd/backbones/__init__.py
@@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/paddlers/custom_models/cd/bit.py b/paddlers/custom_models/cd/bit.py
index af9f2f90..0b38fbe0 100644
--- a/paddlers/custom_models/cd/bit.py
+++ b/paddlers/custom_models/cd/bit.py
@@ -22,6 +22,15 @@
from .param_init import KaimingInitMixin
+def calc_product(*args):
+ if len(args) < 1:
+ raise ValueError
+ ret = args[0]
+ for arg in args[1:]:
+ ret *= arg
+ return ret
+
+
class BIT(nn.Layer):
"""
The BIT implementation based on PaddlePaddle.
@@ -131,9 +140,10 @@ def __init__(self,
def _get_semantic_tokens(self, x):
b, c = x.shape[:2]
att_map = self.conv_att(x)
- att_map = att_map.reshape((b, self.token_len, 1, -1))
+ att_map = att_map.reshape(
+ (b, self.token_len, 1, calc_product(*att_map.shape[2:])))
att_map = F.softmax(att_map, axis=-1)
- x = x.reshape((b, 1, c, -1))
+ x = x.reshape((b, 1, c, att_map.shape[-1]))
tokens = (x * att_map).sum(-1)
return tokens
@@ -172,7 +182,7 @@ def forward(self, t1, t2):
else:
token1 = self._get_reshaped_tokens(x1)
token2 = self._get_reshaped_tokens(x2)
-
+
# Transformer encoder forward
token = paddle.concat([token1, token2], axis=1)
token = self.encode(token)
@@ -253,6 +263,7 @@ def __init__(self,
inner_dim = head_dim * n_heads
self.n_heads = n_heads
+ self.head_dim = head_dim
self.scale = dim**-0.5
self.apply_softmax = apply_softmax
@@ -272,9 +283,10 @@ def forward(self, x, ref):
k = self.fc_k(ref)
v = self.fc_v(ref)
- q = q.reshape((b, n, h, -1)).transpose((0, 2, 1, 3))
- k = k.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3))
- v = v.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3))
+ q = q.reshape((b, n, h, self.head_dim)).transpose((0, 2, 1, 3))
+ rn = ref.shape[1]
+ k = k.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3))
+ v = v.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3))
mult = paddle.matmul(q, k, transpose_y=True) * self.scale
diff --git a/paddlers/custom_models/cd/fc_ef.py b/paddlers/custom_models/cd/fc_ef.py
index a0086882..a831485b 100644
--- a/paddlers/custom_models/cd/fc_ef.py
+++ b/paddlers/custom_models/cd/fc_ef.py
@@ -131,8 +131,7 @@ def forward(self, t1, t2):
# Stage 4d
x4d = self.upconv4(x4p)
- pad4 = (0, paddle.shape(x43)[3] - paddle.shape(x4d)[3], 0,
- paddle.shape(x43)[2] - paddle.shape(x4d)[2])
+ pad4 = (0, x43.shape[3] - x4d.shape[3], 0, x43.shape[2] - x4d.shape[2])
x4d = paddle.concat([F.pad(x4d, pad=pad4, mode='replicate'), x43], 1)
x43d = self.do43d(self.conv43d(x4d))
x42d = self.do42d(self.conv42d(x43d))
@@ -140,8 +139,7 @@ def forward(self, t1, t2):
# Stage 3d
x3d = self.upconv3(x41d)
- pad3 = (0, paddle.shape(x33)[3] - paddle.shape(x3d)[3], 0,
- paddle.shape(x33)[2] - paddle.shape(x3d)[2])
+ pad3 = (0, x33.shape[3] - x3d.shape[3], 0, x33.shape[2] - x3d.shape[2])
x3d = paddle.concat([F.pad(x3d, pad=pad3, mode='replicate'), x33], 1)
x33d = self.do33d(self.conv33d(x3d))
x32d = self.do32d(self.conv32d(x33d))
@@ -149,16 +147,14 @@ def forward(self, t1, t2):
# Stage 2d
x2d = self.upconv2(x31d)
- pad2 = (0, paddle.shape(x22)[3] - paddle.shape(x2d)[3], 0,
- paddle.shape(x22)[2] - paddle.shape(x2d)[2])
+ pad2 = (0, x22.shape[3] - x2d.shape[3], 0, x22.shape[2] - x2d.shape[2])
x2d = paddle.concat([F.pad(x2d, pad=pad2, mode='replicate'), x22], 1)
x22d = self.do22d(self.conv22d(x2d))
x21d = self.do21d(self.conv21d(x22d))
# Stage 1d
x1d = self.upconv1(x21d)
- pad1 = (0, paddle.shape(x12)[3] - paddle.shape(x1d)[3], 0,
- paddle.shape(x12)[2] - paddle.shape(x1d)[2])
+ pad1 = (0, x12.shape[3] - x1d.shape[3], 0, x12.shape[2] - x1d.shape[2])
x1d = paddle.concat([F.pad(x1d, pad=pad1, mode='replicate'), x12], 1)
x12d = self.do12d(self.conv12d(x1d))
x11d = self.conv11d(x12d)
diff --git a/paddlers/custom_models/cd/fc_siam_conc.py b/paddlers/custom_models/cd/fc_siam_conc.py
index af705433..bbe2632f 100644
--- a/paddlers/custom_models/cd/fc_siam_conc.py
+++ b/paddlers/custom_models/cd/fc_siam_conc.py
@@ -154,8 +154,8 @@ def forward(self, t1, t2):
# Decode
# Stage 4d
x4d = self.upconv4(x4p)
- pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0,
- paddle.shape(x43_1)[2] - paddle.shape(x4d)[2])
+ pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0,
+ x43_1.shape[2] - x4d.shape[2])
x4d = paddle.concat(
[F.pad(x4d, pad=pad4, mode='replicate'), x43_1, x43_2], 1)
x43d = self.do43d(self.conv43d(x4d))
@@ -164,8 +164,8 @@ def forward(self, t1, t2):
# Stage 3d
x3d = self.upconv3(x41d)
- pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0,
- paddle.shape(x33_1)[2] - paddle.shape(x3d)[2])
+ pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0,
+ x33_1.shape[2] - x3d.shape[2])
x3d = paddle.concat(
[F.pad(x3d, pad=pad3, mode='replicate'), x33_1, x33_2], 1)
x33d = self.do33d(self.conv33d(x3d))
@@ -174,8 +174,8 @@ def forward(self, t1, t2):
# Stage 2d
x2d = self.upconv2(x31d)
- pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0,
- paddle.shape(x22_1)[2] - paddle.shape(x2d)[2])
+ pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0,
+ x22_1.shape[2] - x2d.shape[2])
x2d = paddle.concat(
[F.pad(x2d, pad=pad2, mode='replicate'), x22_1, x22_2], 1)
x22d = self.do22d(self.conv22d(x2d))
@@ -183,8 +183,8 @@ def forward(self, t1, t2):
# Stage 1d
x1d = self.upconv1(x21d)
- pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0,
- paddle.shape(x12_1)[2] - paddle.shape(x1d)[2])
+ pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0,
+ x12_1.shape[2] - x1d.shape[2])
x1d = paddle.concat(
[F.pad(x1d, pad=pad1, mode='replicate'), x12_1, x12_2], 1)
x12d = self.do12d(self.conv12d(x1d))
diff --git a/paddlers/custom_models/cd/fc_siam_diff.py b/paddlers/custom_models/cd/fc_siam_diff.py
index 9343cfed..b60b5dbf 100644
--- a/paddlers/custom_models/cd/fc_siam_diff.py
+++ b/paddlers/custom_models/cd/fc_siam_diff.py
@@ -154,8 +154,8 @@ def forward(self, t1, t2):
# Decode
# Stage 4d
x4d = self.upconv4(x4p)
- pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0,
- paddle.shape(x43_1)[2] - paddle.shape(x4d)[2])
+ pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0,
+ x43_1.shape[2] - x4d.shape[2])
x4d = F.pad(x4d, pad=pad4, mode='replicate')
x4d = paddle.concat([x4d, paddle.abs(x43_1 - x43_2)], 1)
x43d = self.do43d(self.conv43d(x4d))
@@ -164,8 +164,8 @@ def forward(self, t1, t2):
# Stage 3d
x3d = self.upconv3(x41d)
- pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0,
- paddle.shape(x33_1)[2] - paddle.shape(x3d)[2])
+ pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0,
+ x33_1.shape[2] - x3d.shape[2])
x3d = F.pad(x3d, pad=pad3, mode='replicate')
x3d = paddle.concat([x3d, paddle.abs(x33_1 - x33_2)], 1)
x33d = self.do33d(self.conv33d(x3d))
@@ -174,8 +174,8 @@ def forward(self, t1, t2):
# Stage 2d
x2d = self.upconv2(x31d)
- pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0,
- paddle.shape(x22_1)[2] - paddle.shape(x2d)[2])
+ pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0,
+ x22_1.shape[2] - x2d.shape[2])
x2d = F.pad(x2d, pad=pad2, mode='replicate')
x2d = paddle.concat([x2d, paddle.abs(x22_1 - x22_2)], 1)
x22d = self.do22d(self.conv22d(x2d))
@@ -183,8 +183,8 @@ def forward(self, t1, t2):
# Stage 1d
x1d = self.upconv1(x21d)
- pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0,
- paddle.shape(x12_1)[2] - paddle.shape(x1d)[2])
+ pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0,
+ x12_1.shape[2] - x1d.shape[2])
x1d = F.pad(x1d, pad=pad1, mode='replicate')
x1d = paddle.concat([x1d, paddle.abs(x12_1 - x12_2)], 1)
x12d = self.do12d(self.conv12d(x1d))
diff --git a/paddlers/custom_models/cd/layers/__init__.py b/paddlers/custom_models/cd/layers/__init__.py
index ed9d985f..9314c566 100644
--- a/paddlers/custom_models/cd/layers/__init__.py
+++ b/paddlers/custom_models/cd/layers/__init__.py
@@ -13,4 +13,4 @@
# limitations under the License.
from .blocks import *
-from .attention import ChannelAttention, SpatialAttention, CBAM
\ No newline at end of file
+from .attention import ChannelAttention, SpatialAttention, CBAM
diff --git a/paddlers/custom_models/cd/layers/blocks.py b/paddlers/custom_models/cd/layers/blocks.py
index c379d665..2661900f 100644
--- a/paddlers/custom_models/cd/layers/blocks.py
+++ b/paddlers/custom_models/cd/layers/blocks.py
@@ -140,12 +140,14 @@ def __init__(self,
class MaxPool2x2(nn.MaxPool2D):
def __init__(self, **kwargs):
- super(MaxPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
+ super(MaxPool2x2, self).__init__(
+ kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
class MaxUnPool2x2(nn.MaxUnPool2D):
def __init__(self, **kwargs):
- super(MaxUnPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
+ super(MaxUnPool2x2, self).__init__(
+ kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs)
class ConvTransposed3x3(nn.Layer):
diff --git a/paddlers/custom_models/cd/snunet.py b/paddlers/custom_models/cd/snunet.py
index c73af29b..161a9a08 100644
--- a/paddlers/custom_models/cd/snunet.py
+++ b/paddlers/custom_models/cd/snunet.py
@@ -132,7 +132,7 @@ def forward(self, t1, t2):
out = paddle.concat([x0_1, x0_2, x0_3, x0_4], 1)
- intra = paddle.sum(paddle.stack([x0_1, x0_2, x0_3, x0_4]), axis=0)
+ intra = x0_1 + x0_2 + x0_3 + x0_4
m_intra = self.ca_intra(intra)
out = self.ca_inter(out) * (out + paddle.tile(m_intra, (1, 4, 1, 1)))
diff --git a/paddlers/custom_models/cls/condensenet_v2.py b/paddlers/custom_models/cls/condensenet_v2.py
index fe057d63..2ca10739 100644
--- a/paddlers/custom_models/cls/condensenet_v2.py
+++ b/paddlers/custom_models/cls/condensenet_v2.py
@@ -39,7 +39,7 @@ def forward(self, x):
b, c, _, _ = x.shape
y = self.avg_pool(x).reshape((b, c))
y = self.fc(y).reshape((b, c, 1, 1))
- return x * y.expand_as(x)
+ return x * paddle.expand(y, shape=x.shape)
class HS(nn.Layer):
@@ -92,7 +92,7 @@ def ShuffleLayer(x, groups):
# transpose
x = x.transpose((0, 2, 1, 3, 4))
# reshape
- x = x.reshape((batchsize, -1, height, width))
+ x = x.reshape((batchsize, groups * channels_per_group, height, width))
return x
@@ -104,7 +104,7 @@ def ShuffleLayerTrans(x, groups):
# transpose
x = x.transpose((0, 2, 1, 3, 4))
# reshape
- x = x.reshape((batchsize, -1, height, width))
+ x = x.reshape((batchsize, channels_per_group * groups, height, width))
return x
@@ -374,7 +374,8 @@ def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck,
def forward(self, x):
features = self.features(x)
- out = features.reshape((features.shape[0], -1))
+ out = features.reshape((features.shape[0], features.shape[1] *
+ features.shape[2] * features.shape[3]))
out = self.fc(out)
out = self.fc_act(out)
diff --git a/paddlers/custom_models/gan/generators/rcan.py b/paddlers/custom_models/gan/generators/rcan.py
index e838dc2d..9de30c71 100644
--- a/paddlers/custom_models/gan/generators/rcan.py
+++ b/paddlers/custom_models/gan/generators/rcan.py
@@ -8,14 +8,15 @@
def default_conv(in_channels, out_channels, kernel_size, bias=True):
- weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.XavierUniform(),
- need_clip =True)
- return nn.Conv2D(in_channels,
- out_channels,
- kernel_size,
- padding=(kernel_size // 2),
- weight_attr=weight_attr,
- bias_attr=bias)
+ weight_attr = paddle.ParamAttr(
+ initializer=paddle.nn.initializer.XavierUniform(), need_clip=True)
+ return nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size,
+ padding=(kernel_size // 2),
+ weight_attr=weight_attr,
+ bias_attr=bias)
class MeanShift(nn.Conv2D):
diff --git a/paddlers/custom_models/gan/rcan_model.py b/paddlers/custom_models/gan/rcan_model.py
index 0676e756..781f46e1 100644
--- a/paddlers/custom_models/gan/rcan_model.py
+++ b/paddlers/custom_models/gan/rcan_model.py
@@ -27,6 +27,7 @@
class RCANModel(BaseModel):
"""Base SR model for single image super-resolution.
"""
+
def __init__(self, generator, pixel_criterion=None, use_init_weight=False):
"""
Args:
@@ -69,16 +70,14 @@ def train_iter(self, optims=None):
loss_pixel.backward()
optims['optim'].step()
else:
- print('Skip this batch {}! (Loss: {})'.format(
- self.batch + 1, loss_pixel.item()
- ))
+ print('Skip this batch {}! (Loss: {})'.format(self.batch + 1,
+ loss_pixel.item()))
self.batch += 1
if self.batch % 1000 == 0:
- self.error_last = loss_pixel.item()/1000
+ self.error_last = loss_pixel.item() / 1000
print("update error_last:{}".format(self.error_last))
-
def test_iter(self, metrics=None):
self.nets['generator'].eval()
with paddle.no_grad():
@@ -99,8 +98,8 @@ def test_iter(self, metrics=None):
def init_sr_weight(net):
def reset_func(m):
- if hasattr(m, 'weight') and (not isinstance(
- m, (nn.BatchNorm, nn.BatchNorm2D))):
+ if hasattr(m, 'weight') and (
+ not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))):
reset_parameters(m)
net.apply(reset_func)
diff --git a/paddlers/custom_models/seg/farseg.py b/paddlers/custom_models/seg/farseg.py
index 2e0161f8..ce487459 100644
--- a/paddlers/custom_models/seg/farseg.py
+++ b/paddlers/custom_models/seg/farseg.py
@@ -32,7 +32,7 @@ class FPN(nn.Layer):
"""
Module that adds FPN on top of a list of feature maps.
The feature maps are currently supposed to be in increasing depth
- order, and must be consecutive
+ order, and must be consecutive
"""
def __init__(self,
@@ -41,38 +41,35 @@ def __init__(self,
conv_block=ConvReLU,
top_blocks=None):
super(FPN, self).__init__()
- self.inner_blocks = []
- self.layer_blocks = []
+
+ inner_blocks = []
+ layer_blocks = []
for idx, in_channels in enumerate(in_channels_list, 1):
- inner_block = "fpn_inner{}".format(idx)
- layer_block = "fpn_layer{}".format(idx)
if in_channels == 0:
continue
inner_block_module = conv_block(in_channels, out_channels, 1)
layer_block_module = conv_block(out_channels, out_channels, 3, 1)
- self.add_sublayer(inner_block, inner_block_module)
- self.add_sublayer(layer_block, layer_block_module)
for module in [inner_block_module, layer_block_module]:
for m in module.sublayers():
if isinstance(m, nn.Conv2D):
kaiming_normal_init(m.weight)
- self.inner_blocks.append(inner_block)
- self.layer_blocks.append(layer_block)
+ inner_blocks.append(inner_block_module)
+ layer_blocks.append(layer_block_module)
+ self.inner_blocks = nn.LayerList(inner_blocks)
+ self.layer_blocks = nn.LayerList(layer_blocks)
self.top_blocks = top_blocks
def forward(self, x):
- last_inner = getattr(self, self.inner_blocks[-1])(x[-1])
- results = [getattr(self, self.layer_blocks[-1])(last_inner)]
- for feature, inner_block, layer_block in zip(
- x[:-1][::-1], self.inner_blocks[:-1][::-1],
- self.layer_blocks[:-1][::-1]):
- if not inner_block:
- continue
+ last_inner = self.inner_blocks[-1](x[-1])
+ results = [self.layer_blocks[-1](last_inner)]
+ for i, feature in enumerate(x[-2::-1]):
+ inner_block = self.inner_blocks[len(self.inner_blocks) - 2 - i]
+ layer_block = self.layer_blocks[len(self.layer_blocks) - 2 - i]
inner_top_down = F.interpolate(
last_inner, scale_factor=2, mode="nearest")
- inner_lateral = getattr(self, inner_block)(feature)
+ inner_lateral = inner_block(feature)
last_inner = inner_lateral + inner_top_down
- results.insert(0, getattr(self, layer_block)(last_inner))
+ results.insert(0, layer_block(last_inner))
if isinstance(self.top_blocks, LastLevelP6P7):
last_results = self.top_blocks(x[-1], results[-1])
results.extend(last_results)
diff --git a/paddlers/datasets/__init__.py b/paddlers/datasets/__init__.py
index 145fe304..a8aeca07 100644
--- a/paddlers/datasets/__init__.py
+++ b/paddlers/datasets/__init__.py
@@ -17,4 +17,4 @@
from .seg_dataset import SegDataset
from .cd_dataset import CDDataset
from .clas_dataset import ClasDataset
-from .sr_dataset import SRdataset, ComposeTrans
\ No newline at end of file
+from .sr_dataset import SRdataset, ComposeTrans
diff --git a/paddlers/datasets/cd_dataset.py b/paddlers/datasets/cd_dataset.py
index 4f11d06b..c0576ad0 100644
--- a/paddlers/datasets/cd_dataset.py
+++ b/paddlers/datasets/cd_dataset.py
@@ -17,6 +17,7 @@
import os.path as osp
from paddle.io import Dataset
+
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
diff --git a/paddlers/datasets/clas_dataset.py b/paddlers/datasets/clas_dataset.py
index bf21ed40..172513dd 100644
--- a/paddlers/datasets/clas_dataset.py
+++ b/paddlers/datasets/clas_dataset.py
@@ -16,6 +16,7 @@
import copy
from paddle.io import Dataset
+
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
diff --git a/paddlers/datasets/coco.py b/paddlers/datasets/coco.py
index 667c01bb..b4fc845f 100644
--- a/paddlers/datasets/coco.py
+++ b/paddlers/datasets/coco.py
@@ -23,7 +23,7 @@
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
-from paddlers.transforms import ImgDecoder, MixupImage
+from paddlers.transforms import DecodeImg, MixupImage
from paddlers.tools import YOLOAnchorCluster
@@ -256,8 +256,8 @@ def __getitem__(self, idx):
if self.data_fields is not None:
sample_mix = {k: sample_mix[k] for k in self.data_fields}
sample = self.mixup_op(sample=[
- ImgDecoder(to_rgb=False)(sample),
- ImgDecoder(to_rgb=False)(sample_mix)
+ DecodeImg(to_rgb=False)(sample),
+ DecodeImg(to_rgb=False)(sample_mix)
])
sample = self.transforms(sample)
return sample
diff --git a/paddlers/datasets/voc.py b/paddlers/datasets/voc.py
index 2ce2f36c..1876910e 100644
--- a/paddlers/datasets/voc.py
+++ b/paddlers/datasets/voc.py
@@ -25,7 +25,7 @@
from paddle.io import Dataset
from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic
-from paddlers.transforms import ImgDecoder, MixupImage
+from paddlers.transforms import DecodeImg, MixupImage
from paddlers.tools import YOLOAnchorCluster
@@ -320,8 +320,8 @@ def __getitem__(self, idx):
if self.data_fields is not None:
sample_mix = {k: sample_mix[k] for k in self.data_fields}
sample = self.mixup_op(sample=[
- ImgDecoder(to_rgb=False)(sample),
- ImgDecoder(to_rgb=False)(sample_mix)
+ DecodeImg(to_rgb=False)(sample),
+ DecodeImg(to_rgb=False)(sample_mix)
])
sample = self.transforms(sample)
return sample
diff --git a/paddlers/deploy/predictor.py b/paddlers/deploy/predictor.py
index 96fdb975..2bc3d382 100644
--- a/paddlers/deploy/predictor.py
+++ b/paddlers/deploy/predictor.py
@@ -175,9 +175,9 @@ def postprocess(self, net_outputs, topk=1, ori_shape=None, transforms=None):
if self._model._postprocess is None:
self._model.build_postprocess_from_labels(topk)
# XXX: Convert ndarray to tensor as self._model._postprocess requires
- net_outputs = paddle.to_tensor(net_outputs)
- assert net_outputs.shape[1] == 1
- outputs = self._model._postprocess(net_outputs.squeeze(1))
+ assert len(net_outputs) == 1
+ net_outputs = paddle.to_tensor(net_outputs[0])
+ outputs = self._model._postprocess(net_outputs)
class_ids = map(itemgetter('class_ids'), outputs)
scores = map(itemgetter('scores'), outputs)
label_names = map(itemgetter('label_names'), outputs)
@@ -252,22 +252,26 @@ def predict(self,
transforms=None,
warmup_iters=0,
repeats=1):
- """ 图片预测
+ """
+ Do prediction.
+
Args:
- img_file(List[str or tuple or np.ndarray], str, tuple, or np.ndarray):
- 对于场景分类、图像复原、目标检测和语义分割任务来说,该参数可为单一图像路径,或是解码后的、排列格式为(H, W, C)
- 且具有float32类型的BGR图像(表示为numpy的ndarray形式),或者是一组图像路径或np.ndarray对象构成的列表;对于变化检测
- 任务来说,该参数可以为图像路径二元组(分别表示前后两个时相影像路径),或是两幅图像组成的二元组,或者是上述两种二元组
- 之一构成的列表。
- topk(int): 场景分类模型预测时使用,表示预测前topk的结果。默认值为1。
- transforms (paddlers.transforms): 数据预处理操作。默认值为None, 即使用`model.yml`中保存的数据预处理操作。
- warmup_iters (int): 预热轮数,用于评估模型推理以及前后处理速度。若大于1,会预先重复预测warmup_iters,而后才开始正式的预测及其速度评估。默认为0。
- repeats (int): 重复次数,用于评估模型推理以及前后处理速度。若大于1,会预测repeats次取时间平均值。默认值为1。
+ img_file(list[str | tuple | np.ndarray] | str | tuple | np.ndarray): For scene classification, image restoration,
+ object detection and semantic segmentation tasks, `img_file` should be either the path of the image to predict
+ , a decoded image (a `np.ndarray`, which should be consistent with what you get from passing image path to
+ `paddlers.transforms.decode_image()`), or a list of image paths or decoded images. For change detection tasks,
+ `img_file` should be a tuple of image paths, a tuple of decoded images, or a list of tuples.
+ topk(int, optional): Top-k values to reserve in a classification result. Defaults to 1.
+ transforms (paddlers.transforms.Compose | None, optional): Pipeline of data preprocessing. If None, load transforms
+ from `model.yml`. Defaults to None.
+ warmup_iters (int, optional): Warm-up iterations before measuring the execution time. Defaults to 0.
+ repeats (int, optional): Number of repetitions to evaluate model inference and data processing speed. If greater than
+ 1, the reported time consumption is the average of all repeats. Defaults to 1.
"""
if repeats < 1:
logging.error("`repeats` must be greater than 1.", exit=True)
if transforms is None and not hasattr(self._model, 'test_transforms'):
- raise Exception("Transforms need to be defined, now is None.")
+ raise ValueError("Transforms need to be defined, now is None.")
if transforms is None:
transforms = self._model.test_transforms
if isinstance(img_file, tuple) and len(img_file) != 2:
diff --git a/paddlers/models/__init__.py b/paddlers/models/__init__.py
index 9ddd0975..345e589c 100644
--- a/paddlers/models/__init__.py
+++ b/paddlers/models/__init__.py
@@ -12,4 +12,4 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-from . import ppcls, ppdet, ppseg, ppgan
\ No newline at end of file
+from . import ppcls, ppdet, ppseg, ppgan
diff --git a/paddlers/models/ppcls/__init__.py b/paddlers/models/ppcls/__init__.py
index a906591d..2ba76e9b 100644
--- a/paddlers/models/ppcls/__init__.py
+++ b/paddlers/models/ppcls/__init__.py
@@ -20,4 +20,4 @@
from .arch import *
from .optimizer import *
from .data import *
-from .utils import *
\ No newline at end of file
+from .utils import *
diff --git a/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py b/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
index 2b843788..f7d0931b 100644
--- a/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
+++ b/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py
@@ -201,22 +201,14 @@ def __init__(self, block_num=8):
super(MiddleFlow, self).__init__()
self.block_num = block_num
- self._conv_0 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_0")
- self._conv_1 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_1")
- self._conv_2 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_2")
- self._conv_3 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_3")
- self._conv_4 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_4")
- self._conv_5 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_5")
- self._conv_6 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_6")
- self._conv_7 = MiddleFlowBottleneckBlock(
- 728, 728, name="middle_flow_7")
+ self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0")
+ self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1")
+ self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2")
+ self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3")
+ self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4")
+ self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5")
+ self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6")
+ self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7")
if block_num == 16:
self._conv_8 = MiddleFlowBottleneckBlock(
728, 728, name="middle_flow_8")
@@ -297,8 +289,7 @@ def __init__(self, class_num):
name = "exit_flow"
- self._conv_0 = ExitFlowBottleneckBlock(
- 728, 728, 1024, name=name + "_1")
+ self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1")
self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2")
self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3")
self._pool = AdaptiveAvgPool2D(1)
@@ -362,16 +353,14 @@ def Xception41(pretrained=False, use_ssld=False, **kwargs):
def Xception65(pretrained=False, use_ssld=False, **kwargs):
- model = Xception(
- entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
+ model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld)
return model
def Xception71(pretrained=False, use_ssld=False, **kwargs):
- model = Xception(
- entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
+ model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs)
_load_pretrained(
pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld)
return model
diff --git a/paddlers/models/ppcls/data/utils/__init__.py b/paddlers/models/ppcls/data/utils/__init__.py
index 61d5aa21..185a92b8 100644
--- a/paddlers/models/ppcls/data/utils/__init__.py
+++ b/paddlers/models/ppcls/data/utils/__init__.py
@@ -10,4 +10,4 @@
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
-# limitations under the License.
\ No newline at end of file
+# limitations under the License.
diff --git a/paddlers/models/ppcls/loss/deephashloss.py b/paddlers/models/ppcls/loss/deephashloss.py
index c9a58dc7..0fecb632 100644
--- a/paddlers/models/ppcls/loss/deephashloss.py
+++ b/paddlers/models/ppcls/loss/deephashloss.py
@@ -15,6 +15,7 @@
import paddle
import paddle.nn as nn
+
class DSHSDLoss(nn.Layer):
"""
# DSHSD(IEEE ACCESS 2019)
@@ -23,6 +24,7 @@ class DSHSDLoss(nn.Layer):
# [DSHSD] epoch:250, bit:48, dataset:nuswide_21, MAP:0.809, Best MAP: 0.815
# [DSHSD] epoch:135, bit:48, dataset:imagenet, MAP:0.647, Best MAP: 0.647
"""
+
def __init__(self, alpha, multi_label=False):
super(DSHSDLoss, self).__init__()
self.alpha = alpha
@@ -65,6 +67,7 @@ class LCDSHLoss(nn.Layer):
# [LCDSH] epoch:145, bit:48, dataset:cifar10-1, MAP:0.798, Best MAP: 0.798
# [LCDSH] epoch:183, bit:48, dataset:nuswide_21, MAP:0.833, Best MAP: 0.834
"""
+
def __init__(self, n_class, _lambda):
super(LCDSHLoss, self).__init__()
self._lambda = _lambda
@@ -75,9 +78,11 @@ def forward(self, input, label):
# label to ont-hot
label = paddle.flatten(label)
- label = paddle.nn.functional.one_hot(label, self.n_class).astype("float32")
-
- s = 2 * (paddle.matmul(label, label, transpose_y=True) > 0).astype("float32") - 1
+ label = paddle.nn.functional.one_hot(label,
+ self.n_class).astype("float32")
+
+ s = 2 * (paddle.matmul(
+ label, label, transpose_y=True) > 0).astype("float32") - 1
inner_product = paddle.matmul(feature, feature, transpose_y=True) * 0.5
inner_product = inner_product.clip(min=-50, max=50)
@@ -89,4 +94,3 @@ def forward(self, input, label):
L2 = (sigmoid(inner_product) - sigmoid(inner_product_)).pow(2).mean()
return {"lcdshloss": L1 + self._lambda * L2}
-
diff --git a/paddlers/models/ppcls/loss/googlenetloss.py b/paddlers/models/ppcls/loss/googlenetloss.py
index c580aa61..c26915b0 100644
--- a/paddlers/models/ppcls/loss/googlenetloss.py
+++ b/paddlers/models/ppcls/loss/googlenetloss.py
@@ -19,10 +19,11 @@ class GoogLeNetLoss(nn.Layer):
"""
Cross entropy loss used after googlenet
"""
+
def __init__(self, epsilon=None):
super().__init__()
- assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth"
-
+ assert (epsilon is None or epsilon <= 0 or
+ epsilon >= 1), "googlenet is not support label_smooth"
def forward(self, inputs, label):
input0, input1, input2 = inputs
diff --git a/paddlers/models/ppcls/loss/multilabelloss.py b/paddlers/models/ppcls/loss/multilabelloss.py
index d30d5b8d..4ca76418 100644
--- a/paddlers/models/ppcls/loss/multilabelloss.py
+++ b/paddlers/models/ppcls/loss/multilabelloss.py
@@ -26,11 +26,9 @@ def _labelsmoothing(self, target, class_num):
def _binary_crossentropy(self, input, target, class_num):
if self.epsilon is not None:
target = self._labelsmoothing(target, class_num)
- cost = F.binary_cross_entropy_with_logits(
- logit=input, label=target)
+ cost = F.binary_cross_entropy_with_logits(logit=input, label=target)
else:
- cost = F.binary_cross_entropy_with_logits(
- logit=input, label=target)
+ cost = F.binary_cross_entropy_with_logits(logit=input, label=target)
return cost
diff --git a/paddlers/models/ppcls/loss/pairwisecosface.py b/paddlers/models/ppcls/loss/pairwisecosface.py
index beb80686..8969cf92 100644
--- a/paddlers/models/ppcls/loss/pairwisecosface.py
+++ b/paddlers/models/ppcls/loss/pairwisecosface.py
@@ -36,8 +36,10 @@ def forward(self, embedding, targets):
dist_mat = paddle.matmul(embedding, embedding, transpose_y=True)
N = dist_mat.shape[0]
- is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
- is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float')
+ is_pos = targets.reshape([N, 1]).expand([N, N]).equal(
+ paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
+ is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal(
+ paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float')
# Mask scores related to itself
is_pos = is_pos - paddle.eye(N, N)
@@ -48,8 +50,9 @@ def forward(self, embedding, targets):
logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos)
logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg)
- loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean()
-
- return {"PairwiseCosface": loss}
-
+ loss = F.softplus(
+ paddle.logsumexp(
+ logit_p, axis=1) + paddle.logsumexp(
+ logit_n, axis=1)).mean()
+ return {"PairwiseCosface": loss}
diff --git a/paddlers/models/ppcls/loss/supconloss.py b/paddlers/models/ppcls/loss/supconloss.py
index 3dd33bc1..585c0e02 100644
--- a/paddlers/models/ppcls/loss/supconloss.py
+++ b/paddlers/models/ppcls/loss/supconloss.py
@@ -58,8 +58,7 @@ def forward(self, features, labels, mask=None):
elif labels is not None:
labels = labels.reshape([-1, 1])
if labels.shape[0] != batch_size:
- raise ValueError(
- 'Num of labels does not match num of features')
+ raise ValueError('Num of labels does not match num of features')
mask = paddle.cast(
paddle.equal(labels, paddle.t(labels)), 'float32')
else:
diff --git a/paddlers/models/ppcls/metric/__init__.py b/paddlers/models/ppcls/metric/__init__.py
index 94721235..b85bdd57 100644
--- a/paddlers/models/ppcls/metric/__init__.py
+++ b/paddlers/models/ppcls/metric/__init__.py
@@ -34,8 +34,7 @@ def __init__(self, config_list):
metric_name = list(config)[0]
metric_params = config[metric_name]
if metric_params is not None:
- self.metric_func_list.append(
- eval(metric_name)(**metric_params))
+ self.metric_func_list.append(eval(metric_name)(**metric_params))
else:
self.metric_func_list.append(eval(metric_name)())
diff --git a/paddlers/models/ppcls/utils/download.py b/paddlers/models/ppcls/utils/download.py
index 9c457504..e70f2824 100644
--- a/paddlers/models/ppcls/utils/download.py
+++ b/paddlers/models/ppcls/utils/download.py
@@ -120,8 +120,7 @@ def get_path_from_url(url,
# Mainly used to solve the problem of downloading data from different
# machines in the case of multiple machines. Different ips will download
# data, and the same ip will only download data once.
- unique_endpoints = _get_unique_endpoints(ParallelEnv()
- .trainer_endpoints[:])
+ unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:])
if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum):
logger.info("Found {}".format(fullpath))
else:
diff --git a/paddlers/models/ppdet/metrics/json_results.py b/paddlers/models/ppdet/metrics/json_results.py
index 3d7054de..aab0fbec 100755
--- a/paddlers/models/ppdet/metrics/json_results.py
+++ b/paddlers/models/ppdet/metrics/json_results.py
@@ -141,8 +141,8 @@ def get_keypoint_res(results, im_id):
}
x = kpt[0::3]
y = kpt[1::3]
- x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(
- y).item(), np.max(y).item()
+ x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item(
+ ), np.max(y).item()
ann['area'] = (x1 - x0) * (y1 - y0)
ann['bbox'] = [x0, y0, x1 - x0, y1 - y0]
anns.append(ann)
diff --git a/paddlers/models/ppdet/modeling/architectures/meta_arch.py b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
index 1ab14485..77db52b8 100644
--- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py
+++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py
@@ -25,8 +25,7 @@ def load_meanstd(self, cfg_transform):
self.scale = 1.
self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape(
(1, 3, 1, 1))
- self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape(
- (1, 3, 1, 1))
+ self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1))
for item in cfg_transform:
if 'NormalizeImage' in item:
self.mean = paddle.to_tensor(item['NormalizeImage'][
@@ -83,8 +82,7 @@ def merge_multi_scale_predictions(self, outs):
nms_threshold = 0.5
keep_top_k = 100
- if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'
- ):
+ if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'):
num_classes = self.bbox_head.num_classes
keep_top_k = self.bbox_post_process.nms.keep_top_k
nms_threshold = self.bbox_post_process.nms.nms_threshold
diff --git a/paddlers/models/ppdet/modeling/backbones/vgg.py b/paddlers/models/ppdet/modeling/backbones/vgg.py
index d383677c..1b9e19ac 100755
--- a/paddlers/models/ppdet/modeling/backbones/vgg.py
+++ b/paddlers/models/ppdet/modeling/backbones/vgg.py
@@ -109,12 +109,12 @@ def forward(self, inputs):
@register
@serializable
class VGG(nn.Layer):
- def __init__(
- self,
- depth=16,
- normalizations=[20., -1, -1, -1, -1, -1],
- extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
- [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]]):
+ def __init__(self,
+ depth=16,
+ normalizations=[20., -1, -1, -1, -1, -1],
+ extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3],
+ [128, 256, 0, 1, 3],
+ [128, 256, 0, 1, 3]]):
super(VGG, self).__init__()
assert depth in [16, 19], \
@@ -159,8 +159,8 @@ def __init__(
for i, v in enumerate(self.extra_block_filters):
assert len(v) == 5, "extra_block_filters size not fix"
extra_conv = self.add_sublayer("conv{}".format(6 + i),
- ExtraBlock(last_channels, v[0],
- v[1], v[2], v[3], v[4]))
+ ExtraBlock(last_channels, v[0], v[1],
+ v[2], v[3], v[4]))
last_channels = v[1]
self.extra_convs.append(extra_conv)
self._out_channels.append(last_channels)
diff --git a/paddlers/models/ppdet/modeling/bbox_utils.py b/paddlers/models/ppdet/modeling/bbox_utils.py
index b2ab46cf..d5d376cb 100644
--- a/paddlers/models/ppdet/modeling/bbox_utils.py
+++ b/paddlers/models/ppdet/modeling/bbox_utils.py
@@ -265,8 +265,7 @@ def decode_yolo(box, anchor, downsample_ratio):
"""
x, y, w, h = box
na, grid_h, grid_w = x.shape[1:4]
- grid = make_grid(grid_h, grid_w, x.dtype).reshape(
- (1, 1, grid_h, grid_w, 2))
+ grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2))
x1 = (x + grid[:, :, :, :, 0:1]) / grid_w
y1 = (y + grid[:, :, :, :, 1:2]) / grid_h
@@ -345,8 +344,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9):
# convex diagonal squared
c2 = cw**2 + ch**2 + eps
# center distance
- rho2 = (
- (px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
+ rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4
if diou:
return iou - rho2 / c2
else:
@@ -461,8 +459,8 @@ def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]):
coord = gt[..., 0:2] - proposals[..., 0:2]
dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4])
* coord[..., 1]) / proposals_widths
- dy = (-np.sin(proposals[..., 4]) * coord[..., 0] +
- np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights
+ dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4])
+ * coord[..., 1]) / proposals_heights
dw = np.log(gt_widths / proposals_widths)
dh = np.log(gt_heights / proposals_heights)
da = (gt_angle - proposals_angle)
@@ -553,8 +551,7 @@ def norm_angle(angle, range=[-np.pi / 4, np.pi]):
def cal_line_length(point1, point2):
import math
return math.sqrt(
- math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1],
- 2))
+ math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2))
def get_best_begin_point_single(coordinate):
diff --git a/paddlers/models/ppdet/modeling/layers.py b/paddlers/models/ppdet/modeling/layers.py
index d4c7b791..6a0cca15 100644
--- a/paddlers/models/ppdet/modeling/layers.py
+++ b/paddlers/models/ppdet/modeling/layers.py
@@ -170,12 +170,10 @@ def __init__(self,
norm_lr = 0. if freeze_norm else 1.
param_attr = ParamAttr(
learning_rate=norm_lr,
- regularizer=L2Decay(norm_decay)
- if norm_decay is not None else None)
+ regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
bias_attr = ParamAttr(
learning_rate=norm_lr,
- regularizer=L2Decay(norm_decay)
- if norm_decay is not None else None)
+ regularizer=L2Decay(norm_decay) if norm_decay is not None else None)
if norm_type in ['bn', 'sync_bn']:
self.norm = nn.BatchNorm2D(
ch_out, weight_attr=param_attr, bias_attr=bias_attr)
@@ -293,19 +291,18 @@ def forward(self, x):
@register
@serializable
class AnchorGeneratorSSD(object):
- def __init__(
- self,
- steps=[8, 16, 32, 64, 100, 300],
- aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
- min_ratio=15,
- max_ratio=90,
- base_size=300,
- min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0],
- max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0],
- offset=0.5,
- flip=True,
- clip=False,
- min_max_aspect_ratios_order=False):
+ def __init__(self,
+ steps=[8, 16, 32, 64, 100, 300],
+ aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]],
+ min_ratio=15,
+ max_ratio=90,
+ base_size=300,
+ min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0],
+ max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0],
+ offset=0.5,
+ flip=True,
+ clip=False,
+ min_max_aspect_ratios_order=False):
self.steps = steps
self.aspect_ratios = aspect_ratios
self.min_ratio = min_ratio
@@ -1035,19 +1032,16 @@ def __call__(self,
seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1)
# inter.
- inter_matrix = paddle.mm(seg_masks,
- paddle.transpose(seg_masks, [1, 0]))
+ inter_matrix = paddle.mm(seg_masks, paddle.transpose(seg_masks, [1, 0]))
n_samples = paddle.shape(cate_labels)
# union.
sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples])
# iou.
iou_matrix = (inter_matrix / (
- sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix)
- )
+ sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix))
iou_matrix = paddle.triu(iou_matrix, diagonal=1)
# label_specific matrix.
- cate_labels_x = paddle.expand(
- cate_labels, shape=[n_samples, n_samples])
+ cate_labels_x = paddle.expand(cate_labels, shape=[n_samples, n_samples])
label_matrix = paddle.cast(
(cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])),
'float32')
@@ -1304,8 +1298,8 @@ def compute_qkv(self, tensor, index):
if self._qkv_same_embed_dim:
tensor = F.linear(
x=tensor,
- weight=self.in_proj_weight[:, index * self.embed_dim:(
- index + 1) * self.embed_dim],
+ weight=self.in_proj_weight[:, index * self.embed_dim:(index + 1)
+ * self.embed_dim],
bias=self.in_proj_bias[index * self.embed_dim:(index + 1) *
self.embed_dim]
if self.in_proj_bias is not None else None)
diff --git a/paddlers/models/ppdet/modeling/ops.py b/paddlers/models/ppdet/modeling/ops.py
index 552e5224..005a1313 100644
--- a/paddlers/models/ppdet/modeling/ops.py
+++ b/paddlers/models/ppdet/modeling/ops.py
@@ -690,13 +690,12 @@ def yolo_box(
if not isinstance(class_num, int):
raise TypeError("Attr class_num of yolo_box must be an integer")
if not isinstance(conf_thresh, float):
- raise TypeError(
- "Attr ignore_thresh of yolo_box must be a float number")
+ raise TypeError("Attr ignore_thresh of yolo_box must be a float number")
if in_dygraph_mode():
attrs = ('anchors', anchors, 'class_num', class_num, 'conf_thresh',
- conf_thresh, 'downsample_ratio', downsample_ratio,
- 'clip_bbox', clip_bbox, 'scale_x_y', scale_x_y)
+ conf_thresh, 'downsample_ratio', downsample_ratio, 'clip_bbox',
+ clip_bbox, 'scale_x_y', scale_x_y)
boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs)
return boxes, scores
else:
@@ -978,8 +977,8 @@ class number
score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold',
nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta,
'normalized', normalized)
- output, index, nms_rois_num = core.ops.multiclass_nms3(
- bboxes, scores, rois_num, *attrs)
+ output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores,
+ rois_num, *attrs)
if not return_index:
index = None
return output, nms_rois_num, index
@@ -1116,10 +1115,10 @@ def matrix_nms(bboxes,
if in_dygraph_mode():
attrs = ('background_label', background_label, 'score_threshold',
- score_threshold, 'post_threshold', post_threshold,
- 'nms_top_k', nms_top_k, 'gaussian_sigma', gaussian_sigma,
- 'use_gaussian', use_gaussian, 'keep_top_k', keep_top_k,
- 'normalized', normalized)
+ score_threshold, 'post_threshold', post_threshold, 'nms_top_k',
+ nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian',
+ use_gaussian, 'keep_top_k', keep_top_k, 'normalized',
+ normalized)
out, index, rois_num = core.ops.matrix_nms(bboxes, scores, *attrs)
if not return_index:
index = None
@@ -1503,9 +1502,9 @@ def generate_proposals(scores,
"""
if in_dygraph_mode():
assert return_rois_num, "return_rois_num should be True in dygraph mode."
- attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN',
- post_nms_top_n, 'nms_thresh', nms_thresh, 'min_size',
- min_size, 'eta', eta, 'pixel_offset', pixel_offset)
+ attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n,
+ 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta,
+ 'pixel_offset', pixel_offset)
rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2(
scores, bbox_deltas, im_shape, anchors, variances, *attrs)
return rpn_rois, rpn_roi_probs, rpn_rois_num
@@ -1575,10 +1574,7 @@ def sigmoid_cross_entropy_with_logits(input,
return output
-def smooth_l1(input,
- label,
- inside_weight=None,
- outside_weight=None,
+def smooth_l1(input, label, inside_weight=None, outside_weight=None,
sigma=None):
input_new = paddle.multiply(input, inside_weight)
label_new = paddle.multiply(label, inside_weight)
diff --git a/paddlers/models/ppdet/modeling/post_process.py b/paddlers/models/ppdet/modeling/post_process.py
index 8922f0f3..b9e556e4 100644
--- a/paddlers/models/ppdet/modeling/post_process.py
+++ b/paddlers/models/ppdet/modeling/post_process.py
@@ -209,7 +209,7 @@ def __call__(self, mask_out, bboxes, bbox_num, origin_shape):
# TODO: support bs > 1 and mask output dtype is bool
pred_result = paddle.zeros(
[num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32')
- if bbox_num == 1 and bboxes[0][0] == -1:
+ if (len(bbox_num) == 1 and bbox_num[0] == 1) and bboxes[0][0] == -1:
return pred_result
# TODO: optimize chunk paste
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
index 27b30307..40538a6f 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py
@@ -77,8 +77,7 @@ def generate_cell_anchors(self, sizes, aspect_ratios):
def _calculate_anchors(self, num_features):
sizes = self._broadcast_params(self.anchor_sizes, num_features)
- aspect_ratios = self._broadcast_params(self.aspect_ratios,
- num_features)
+ aspect_ratios = self._broadcast_params(self.aspect_ratios, num_features)
cell_anchors = [
self.generate_cell_anchors(s, a)
for s, a in zip(sizes, aspect_ratios)
@@ -94,10 +93,7 @@ def _create_grid_offsets(self, size, stride, offset):
shifts_x = paddle.arange(
offset * stride, grid_width * stride, step=stride, dtype='float32')
shifts_y = paddle.arange(
- offset * stride,
- grid_height * stride,
- step=stride,
- dtype='float32')
+ offset * stride, grid_height * stride, step=stride, dtype='float32')
shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x)
shift_x = paddle.reshape(shift_x, [-1])
shift_y = paddle.reshape(shift_y, [-1])
diff --git a/paddlers/models/ppdet/modeling/proposal_generator/target.py b/paddlers/models/ppdet/modeling/proposal_generator/target.py
index 571ae6c1..b92d0b3d 100644
--- a/paddlers/models/ppdet/modeling/proposal_generator/target.py
+++ b/paddlers/models/ppdet/modeling/proposal_generator/target.py
@@ -40,14 +40,12 @@ def rpn_anchor_target(anchors,
anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True,
ignore_thresh, is_crowd_i, assign_on_cpu)
# Step2: sample anchor
- fg_inds, bg_inds = subsample_labels(match_labels,
- rpn_batch_size_per_im,
+ fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im,
rpn_fg_fraction, 0, use_random)
# Fill with the ignore label (-1), then set positive and negative labels
labels = paddle.full(match_labels.shape, -1, dtype='int32')
if bg_inds.shape[0] > 0:
- labels = paddle.scatter(labels, bg_inds,
- paddle.zeros_like(bg_inds))
+ labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds))
if fg_inds.shape[0] > 0:
labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds))
# Step3: make output
@@ -261,15 +259,14 @@ def sample_bbox(matches,
paddle.ones_like(gt_classes) * num_classes,
gt_classes)
gt_classes = paddle.where(match_labels == -1,
- paddle.ones_like(gt_classes) * -1,
- gt_classes)
+ paddle.ones_like(gt_classes) * -1, gt_classes)
if is_cascade:
index = paddle.arange(matches.shape[0])
return index, gt_classes
rois_per_image = int(batch_size_per_im)
- fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image,
- fg_fraction, num_classes, use_random)
+ fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction,
+ num_classes, use_random)
if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0:
# fake output labeled with -1 when all boxes are neither
# foreground nor background
@@ -364,9 +361,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds,
rasterize_polygons_within_box(new_segm[j], boxes[j],
resolution))
else:
- results.append(
- paddle.ones(
- [resolution, resolution], dtype='int32'))
+ results.append(paddle.ones([resolution, resolution], dtype='int32'))
fg_classes = paddle.gather(labels_per_im, fg_inds)
weight = paddle.ones([fg_rois.shape[0]], dtype='float32')
@@ -484,8 +479,8 @@ def libra_sample_neg(max_overlaps,
if floor_thr > 0:
floor_set = set(
np.where(
- np.logical_and(max_overlaps >= 0, max_overlaps <
- floor_thr))[0])
+ np.logical_and(max_overlaps >= 0, max_overlaps < floor_thr))
+ [0])
iou_sampling_set = set(np.where(max_overlaps >= floor_thr)[0])
elif floor_thr == 0:
floor_set = set(np.where(max_overlaps == 0)[0])
@@ -614,8 +609,7 @@ def libra_sample_bbox(matches,
paddle.ones_like(gt_classes) * num_classes,
gt_classes)
gt_classes = paddle.where(match_labels == -1,
- paddle.ones_like(gt_classes) * -1,
- gt_classes)
+ paddle.ones_like(gt_classes) * -1, gt_classes)
sampled_gt_classes = paddle.gather(gt_classes, sampled_inds)
return sampled_inds, sampled_gt_classes
diff --git a/paddlers/models/ppdet/utils/download.py b/paddlers/models/ppdet/utils/download.py
index bdacbf60..2c007870 100644
--- a/paddlers/models/ppdet/utils/download.py
+++ b/paddlers/models/ppdet/utils/download.py
@@ -170,9 +170,9 @@ def get_dataset_path(path, annotation, image_dir):
if _dataset_exists(path, annotation, image_dir):
return path
- logger.info(
- "Dataset {} is not valid for reason above, try searching {} or "
- "downloading dataset...".format(osp.realpath(path), DATASET_HOME))
+ logger.info("Dataset {} is not valid for reason above, try searching {} or "
+ "downloading dataset...".format(
+ osp.realpath(path), DATASET_HOME))
data_name = os.path.split(path.strip().lower())[-1]
for name, dataset in DATASETS.items():
diff --git a/paddlers/models/ppgan/apps/__init__.py b/paddlers/models/ppgan/apps/__init__.py
index 97eee072..c576f43e 100644
--- a/paddlers/models/ppgan/apps/__init__.py
+++ b/paddlers/models/ppgan/apps/__init__.py
@@ -17,4 +17,4 @@
from .drn_predictor import DRNPredictor
from .pan_predictor import PANPredictor
from .lesrcnn_predictor import LESRCNNPredictor
-from .esrgan_predictor import ESRGANPredictor
\ No newline at end of file
+from .esrgan_predictor import ESRGANPredictor
diff --git a/paddlers/models/ppgan/apps/drn_predictor.py b/paddlers/models/ppgan/apps/drn_predictor.py
index 4ee67429..9c30bd5e 100644
--- a/paddlers/models/ppgan/apps/drn_predictor.py
+++ b/paddlers/models/ppgan/apps/drn_predictor.py
@@ -16,7 +16,7 @@
import numpy as np
from PIL import Image
-import paddle
+import paddle
from ppgan.models.generators import DRNGenerator
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@@ -25,21 +25,25 @@
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/DRNSx4.pdparams'
+
class DRNPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
- self.output = os.path.join(output, 'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
- self.model = DRNGenerator((2, 4)) # 实例化模型
+ self.output = os.path.join(output,
+ 'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
+ self.model = DRNGenerator((2, 4)) # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
- state_dict = paddle.load(weight_path) #加载权重
- state_dict = state_dict['generator']
+ state_dict = paddle.load(weight_path) #加载权重
+ state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
+
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 1.0
return img.astype('float32')
+
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@@ -54,14 +58,15 @@ def run_image(self, img):
elif isinstance(img, Image.Image):
ori_img = img
- img = self.norm(ori_img) #图像标准化
- x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
+ img = self.norm(ori_img) #图像标准化
+ x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
- out = self.model(x)[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果
-
+ out = self.model(
+ x
+ )[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果
- pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
- pred_img = Image.fromarray(pred_img) # array转图像
+ pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
+ pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@@ -70,15 +75,15 @@ def run(self, input):
if not os.path.exists(self.output):
os.makedirs(self.output)
- pred_img = self.run_image(input) #对输入的图片进行预测
+ pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
- out_path = os.path.join(self.output, base_name + '.png') #保存路径
- pred_img.save(out_path) #保存输出图片
+ out_path = os.path.join(self.output, base_name + '.png') #保存路径
+ pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
diff --git a/paddlers/models/ppgan/apps/esrgan_predictor.py b/paddlers/models/ppgan/apps/esrgan_predictor.py
index 12c9698d..f41dd535 100644
--- a/paddlers/models/ppgan/apps/esrgan_predictor.py
+++ b/paddlers/models/ppgan/apps/esrgan_predictor.py
@@ -28,6 +28,7 @@
SR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams'
+
class ESRGANPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
@@ -83,4 +84,4 @@ def run(self, input):
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
- return pred_img, out_path
\ No newline at end of file
+ return pred_img, out_path
diff --git a/paddlers/models/ppgan/apps/lesrcnn_predictor.py b/paddlers/models/ppgan/apps/lesrcnn_predictor.py
index 651dbaa1..b0490304 100644
--- a/paddlers/models/ppgan/apps/lesrcnn_predictor.py
+++ b/paddlers/models/ppgan/apps/lesrcnn_predictor.py
@@ -16,7 +16,7 @@
import numpy as np
from PIL import Image
-import paddle
+import paddle
from ppgan.models.generators import LESRCNNGenerator
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@@ -25,21 +25,25 @@
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/lesrcnn_x4.pdparams'
+
class LESRCNNPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
- self.output = os.path.join(output, 'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
- self.model = LESRCNNGenerator() # 实例化模型
+ self.output = os.path.join(output,
+ 'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
+ self.model = LESRCNNGenerator() # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
- state_dict = paddle.load(weight_path) #加载权重
- state_dict = state_dict['generator']
+ state_dict = paddle.load(weight_path) #加载权重
+ state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
+
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
return img.astype('float32')
+
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@@ -54,14 +58,13 @@ def run_image(self, img):
elif isinstance(img, Image.Image):
ori_img = img
- img = self.norm(ori_img) #图像标准化
- x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
+ img = self.norm(ori_img) #图像标准化
+ x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
out = self.model(x)
-
- pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
- pred_img = Image.fromarray(pred_img) # array转图像
+ pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
+ pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@@ -70,16 +73,16 @@ def run(self, input):
if not os.path.exists(self.output):
os.makedirs(self.output)
- pred_img = self.run_image(input) #对输入的图片进行预测
+ pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
- out_path = os.path.join(self.output, base_name + '.png') #保存路径
- pred_img.save(out_path) #保存输出图片
+ out_path = os.path.join(self.output, base_name + '.png') #保存路径
+ pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
- return pred_img, out_path
\ No newline at end of file
+ return pred_img, out_path
diff --git a/paddlers/models/ppgan/apps/midas/blocks.py b/paddlers/models/ppgan/apps/midas/blocks.py
index bd2c761c..bead2502 100644
--- a/paddlers/models/ppgan/apps/midas/blocks.py
+++ b/paddlers/models/ppgan/apps/midas/blocks.py
@@ -13,10 +13,8 @@ def _make_encoder(backbone,
if backbone == "resnext101_wsl":
# resnext101_wsl
pretrained = _make_pretrained_resnext101_wsl(use_pretrained)
- scratch = _make_scratch([256, 512, 1024, 2048],
- features,
- groups=groups,
- expand=expand)
+ scratch = _make_scratch(
+ [256, 512, 1024, 2048], features, groups=groups, expand=expand)
else:
print(f"Backbone '{backbone}' not implemented")
assert False
@@ -36,34 +34,38 @@ def _make_scratch(in_shape, out_shape, groups=1, expand=False):
out_shape3 = out_shape * 4
out_shape4 = out_shape * 8
- scratch.layer1_rn = nn.Conv2D(in_shape[0],
- out_shape1,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=False,
- groups=groups)
- scratch.layer2_rn = nn.Conv2D(in_shape[1],
- out_shape2,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=False,
- groups=groups)
- scratch.layer3_rn = nn.Conv2D(in_shape[2],
- out_shape3,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=False,
- groups=groups)
- scratch.layer4_rn = nn.Conv2D(in_shape[3],
- out_shape4,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=False,
- groups=groups)
+ scratch.layer1_rn = nn.Conv2D(
+ in_shape[0],
+ out_shape1,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=False,
+ groups=groups)
+ scratch.layer2_rn = nn.Conv2D(
+ in_shape[1],
+ out_shape2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=False,
+ groups=groups)
+ scratch.layer3_rn = nn.Conv2D(
+ in_shape[2],
+ out_shape3,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=False,
+ groups=groups)
+ scratch.layer4_rn = nn.Conv2D(
+ in_shape[3],
+ out_shape4,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=False,
+ groups=groups)
return scratch
@@ -89,6 +91,7 @@ def _make_pretrained_resnext101_wsl(use_pretrained):
class ResidualConvUnit(nn.Layer):
"""Residual convolution module.
"""
+
def __init__(self, features):
"""Init.
@@ -97,19 +100,21 @@ def __init__(self, features):
"""
super().__init__()
- self.conv1 = nn.Conv2D(features,
- features,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=True)
-
- self.conv2 = nn.Conv2D(features,
- features,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=True)
+ self.conv1 = nn.Conv2D(
+ features,
+ features,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=True)
+
+ self.conv2 = nn.Conv2D(
+ features,
+ features,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=True)
self.relu = nn.ReLU()
@@ -133,6 +138,7 @@ def forward(self, x):
class FeatureFusionBlock(nn.Layer):
"""Feature fusion block.
"""
+
def __init__(self, features):
"""Init.
@@ -156,9 +162,7 @@ def forward(self, *xs):
output += self.resConfUnit1(xs[1])
output = self.resConfUnit2(output)
- output = nn.functional.interpolate(output,
- scale_factor=2,
- mode="bilinear",
- align_corners=True)
+ output = nn.functional.interpolate(
+ output, scale_factor=2, mode="bilinear", align_corners=True)
return output
diff --git a/paddlers/models/ppgan/apps/midas/midas_net.py b/paddlers/models/ppgan/apps/midas/midas_net.py
index ef0a00ca..b774ff66 100644
--- a/paddlers/models/ppgan/apps/midas/midas_net.py
+++ b/paddlers/models/ppgan/apps/midas/midas_net.py
@@ -22,6 +22,7 @@ def load(self, path):
class MidasNet(BaseModel):
"""Network for monocular depth estimation.
"""
+
def __init__(self, path=None, features=256, non_negative=True):
"""Init.
@@ -47,11 +48,15 @@ def __init__(self, path=None, features=256, non_negative=True):
self.scratch.refinenet1 = FeatureFusionBlock(features)
output_conv = [
- nn.Conv2D(features, 128, kernel_size=3, stride=1, padding=1),
- nn.Upsample(scale_factor=2, mode="bilinear"),
- nn.Conv2D(128, 32, kernel_size=3, stride=1, padding=1),
+ nn.Conv2D(
+ features, 128, kernel_size=3, stride=1, padding=1),
+ nn.Upsample(
+ scale_factor=2, mode="bilinear"),
+ nn.Conv2D(
+ 128, 32, kernel_size=3, stride=1, padding=1),
nn.ReLU(),
- nn.Conv2D(32, 1, kernel_size=1, stride=1, padding=0),
+ nn.Conv2D(
+ 32, 1, kernel_size=1, stride=1, padding=0),
nn.ReLU() if non_negative else nn.Identity(),
]
if non_negative:
diff --git a/paddlers/models/ppgan/apps/midas/transforms.py b/paddlers/models/ppgan/apps/midas/transforms.py
index 530c552d..2a5bb984 100644
--- a/paddlers/models/ppgan/apps/midas/transforms.py
+++ b/paddlers/models/ppgan/apps/midas/transforms.py
@@ -8,6 +8,7 @@
class Resize(object):
"""Resize sample to given size (width, height).
"""
+
def __init__(self,
width,
height,
@@ -96,15 +97,15 @@ def get_size(self, width, height):
f"resize_method {self.__resize_method} not implemented")
if self.__resize_method == "lower_bound":
- new_height = self.constrain_to_multiple_of(scale_height * height,
- min_val=self.__height)
- new_width = self.constrain_to_multiple_of(scale_width * width,
- min_val=self.__width)
+ new_height = self.constrain_to_multiple_of(
+ scale_height * height, min_val=self.__height)
+ new_width = self.constrain_to_multiple_of(
+ scale_width * width, min_val=self.__width)
elif self.__resize_method == "upper_bound":
- new_height = self.constrain_to_multiple_of(scale_height * height,
- max_val=self.__height)
- new_width = self.constrain_to_multiple_of(scale_width * width,
- max_val=self.__width)
+ new_height = self.constrain_to_multiple_of(
+ scale_height * height, max_val=self.__height)
+ new_width = self.constrain_to_multiple_of(
+ scale_width * width, max_val=self.__width)
elif self.__resize_method == "minimal":
new_height = self.constrain_to_multiple_of(scale_height * height)
new_width = self.constrain_to_multiple_of(scale_width * width)
@@ -122,26 +123,24 @@ def __call__(self, sample):
sample["image"] = cv2.resize(
sample["image"],
(width, height),
- interpolation=self.__image_interpolation_method,
- )
+ interpolation=self.__image_interpolation_method, )
if self.__resize_target:
if "disparity" in sample:
sample["disparity"] = cv2.resize(
sample["disparity"],
(width, height),
- interpolation=cv2.INTER_NEAREST,
- )
+ interpolation=cv2.INTER_NEAREST, )
if "depth" in sample:
- sample["depth"] = cv2.resize(sample["depth"], (width, height),
- interpolation=cv2.INTER_NEAREST)
+ sample["depth"] = cv2.resize(
+ sample["depth"], (width, height),
+ interpolation=cv2.INTER_NEAREST)
sample["mask"] = cv2.resize(
sample["mask"].astype(np.float32),
(width, height),
- interpolation=cv2.INTER_NEAREST,
- )
+ interpolation=cv2.INTER_NEAREST, )
sample["mask"] = sample["mask"].astype(bool)
return sample
@@ -150,6 +149,7 @@ def __call__(self, sample):
class NormalizeImage(object):
"""Normlize image by given mean and std.
"""
+
def __init__(self, mean, std):
self.__mean = mean
self.__std = std
@@ -163,6 +163,7 @@ def __call__(self, sample):
class PrepareForNet(object):
"""Prepare sample for usage as network input.
"""
+
def __init__(self):
pass
diff --git a/paddlers/models/ppgan/apps/midas/utils.py b/paddlers/models/ppgan/apps/midas/utils.py
index 3054a491..5a8b939f 100644
--- a/paddlers/models/ppgan/apps/midas/utils.py
+++ b/paddlers/models/ppgan/apps/midas/utils.py
@@ -26,8 +26,8 @@ def write_pfm(path, image, scale=1):
if len(image.shape) == 3 and image.shape[2] == 3: # color image
color = True
- elif (len(image.shape) == 2
- or len(image.shape) == 3 and image.shape[2] == 1): # greyscale
+ elif (len(image.shape) == 2 or len(image.shape) == 3 and
+ image.shape[2] == 1): # greyscale
color = False
else:
raise Exception(
diff --git a/paddlers/models/ppgan/apps/mpr_predictor.py b/paddlers/models/ppgan/apps/mpr_predictor.py
index 40a9fc32..9c84178a 100644
--- a/paddlers/models/ppgan/apps/mpr_predictor.py
+++ b/paddlers/models/ppgan/apps/mpr_predictor.py
@@ -91,10 +91,10 @@ def __init__(self,
def get_images(self, images_path):
if os.path.isdir(images_path):
return natsorted(
- glob(os.path.join(images_path, '*.jpg')) +
- glob(os.path.join(images_path, '*.JPG')) +
- glob(os.path.join(images_path, '*.png')) +
- glob(os.path.join(images_path, '*.PNG')))
+ glob(os.path.join(images_path, '*.jpg')) + glob(
+ os.path.join(images_path, '*.JPG')) + glob(
+ os.path.join(images_path, '*.png')) + glob(
+ os.path.join(images_path, '*.PNG')))
else:
return [images_path]
diff --git a/paddlers/models/ppgan/apps/pan_predictor.py b/paddlers/models/ppgan/apps/pan_predictor.py
index ddb6b26d..27cb9263 100644
--- a/paddlers/models/ppgan/apps/pan_predictor.py
+++ b/paddlers/models/ppgan/apps/pan_predictor.py
@@ -16,7 +16,7 @@
import numpy as np
from PIL import Image
-import paddle
+import paddle
from ppgan.models.generators import PAN
from ppgan.utils.download import get_path_from_url
from ppgan.utils.logger import get_logger
@@ -25,21 +25,25 @@
REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/pan_x4.pdparams'
+
class PANPredictor(BasePredictor):
def __init__(self, output='output', weight_path=None):
self.input = input
- self.output = os.path.join(output, 'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
- self.model = PAN(3, 3, 40, 24, 16) # 实例化模型
+ self.output = os.path.join(output,
+ 'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹
+ self.model = PAN(3, 3, 40, 24, 16) # 实例化模型
if weight_path is None:
weight_path = get_path_from_url(REALSR_WEIGHT_URL)
- state_dict = paddle.load(weight_path) #加载权重
- state_dict = state_dict['generator']
+ state_dict = paddle.load(weight_path) #加载权重
+ state_dict = state_dict['generator']
self.model.load_dict(state_dict)
self.model.eval()
+
# 标准化
def norm(self, img):
img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0
return img.astype('float32')
+
# 去标准化
def denorm(self, img):
img = img.transpose((1, 2, 0))
@@ -54,14 +58,13 @@ def run_image(self, img):
elif isinstance(img, Image.Image):
ori_img = img
- img = self.norm(ori_img) #图像标准化
- x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
+ img = self.norm(ori_img) #图像标准化
+ x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor
with paddle.no_grad():
out = self.model(x)
-
- pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
- pred_img = Image.fromarray(pred_img) # array转图像
+ pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化
+ pred_img = Image.fromarray(pred_img) # array转图像
return pred_img
#输入图像文件路径
@@ -70,17 +73,16 @@ def run(self, input):
if not os.path.exists(self.output):
os.makedirs(self.output)
- pred_img = self.run_image(input) #对输入的图片进行预测
+ pred_img = self.run_image(input) #对输入的图片进行预测
out_path = None
if self.output:
try:
base_name = os.path.splitext(os.path.basename(input))[0]
except:
base_name = 'result'
- out_path = os.path.join(self.output, base_name + '.png') #保存路径
- pred_img.save(out_path) #保存输出图片
+ out_path = os.path.join(self.output, base_name + '.png') #保存路径
+ pred_img.save(out_path) #保存输出图片
logger = get_logger()
logger.info('Image saved to {}'.format(out_path))
return pred_img, out_path
-
diff --git a/paddlers/models/ppgan/datasets/animeganv2_dataset.py b/paddlers/models/ppgan/datasets/animeganv2_dataset.py
index 7f70ef8b..57a293e7 100644
--- a/paddlers/models/ppgan/datasets/animeganv2_dataset.py
+++ b/paddlers/models/ppgan/datasets/animeganv2_dataset.py
@@ -27,6 +27,7 @@
class AnimeGANV2Dataset(paddle.io.Dataset):
"""
"""
+
def __init__(self,
dataroot,
style,
@@ -50,18 +51,14 @@ def __init__(self,
self.anime_root = os.path.join(self.root, f'{self.style}', 'style')
self.smooth_root = os.path.join(self.root, f'{self.style}', 'smooth')
- self.real = ImageFolder(self.real_root,
- transform=self.transform_real,
- loader=self.loader)
- self.anime = ImageFolder(self.anime_root,
- transform=self.transform_anime,
- loader=self.loader)
- self.anime_gray = ImageFolder(self.anime_root,
- transform=self.transform_gray,
- loader=self.loader)
- self.smooth_gray = ImageFolder(self.smooth_root,
- transform=self.transform_gray,
- loader=self.loader)
+ self.real = ImageFolder(
+ self.real_root, transform=self.transform_real, loader=self.loader)
+ self.anime = ImageFolder(
+ self.anime_root, transform=self.transform_anime, loader=self.loader)
+ self.anime_gray = ImageFolder(
+ self.anime_root, transform=self.transform_gray, loader=self.loader)
+ self.smooth_gray = ImageFolder(
+ self.smooth_root, transform=self.transform_gray, loader=self.loader)
self.sizes = [
len(fold) for fold in [self.real, self.anime, self.smooth_gray]
]
@@ -70,8 +67,9 @@ def __init__(self,
@staticmethod
def loader(path):
- return cv2.cvtColor(cv2.imread(path, flags=cv2.IMREAD_COLOR),
- cv2.COLOR_BGR2RGB)
+ return cv2.cvtColor(
+ cv2.imread(
+ path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB)
def reshuffle(self):
indexs = []
diff --git a/paddlers/models/ppgan/datasets/base_dataset.py b/paddlers/models/ppgan/datasets/base_dataset.py
index 229c8e23..c5535441 100644
--- a/paddlers/models/ppgan/datasets/base_dataset.py
+++ b/paddlers/models/ppgan/datasets/base_dataset.py
@@ -57,9 +57,8 @@ def _scandir(dir_path, suffix, recursive):
yield rel_path
else:
if recursive:
- yield from _scandir(entry.path,
- suffix=suffix,
- recursive=recursive)
+ yield from _scandir(
+ entry.path, suffix=suffix, recursive=recursive)
else:
continue
@@ -79,6 +78,7 @@ class BaseDataset(Dataset, metaclass=ABCMeta):
preprocess (list[dict]): A sequence of data preprocess config.
"""
+
def __init__(self, preprocess=None):
super(BaseDataset, self).__init__()
diff --git a/paddlers/models/ppgan/datasets/base_sr_dataset.py b/paddlers/models/ppgan/datasets/base_sr_dataset.py
index 306ad9ad..43352bc5 100644
--- a/paddlers/models/ppgan/datasets/base_sr_dataset.py
+++ b/paddlers/models/ppgan/datasets/base_sr_dataset.py
@@ -23,6 +23,7 @@
@DATASETS.register()
class SRDataset(BaseDataset):
"""Base super resulotion dataset for image restoration."""
+
def __init__(self,
lq_folder,
gt_folder,
diff --git a/paddlers/models/ppgan/datasets/builder.py b/paddlers/models/ppgan/datasets/builder.py
index 9ee1f41c..cba6ee41 100644
--- a/paddlers/models/ppgan/datasets/builder.py
+++ b/paddlers/models/ppgan/datasets/builder.py
@@ -48,21 +48,24 @@ def build_dataloader(cfg, is_train=True, distributed=True):
dataset = build_dataset(cfg_)
if distributed:
- sampler = DistributedBatchSampler(dataset,
- batch_size=batch_size,
- shuffle=True if is_train else False,
- drop_last=True if is_train else False)
+ sampler = DistributedBatchSampler(
+ dataset,
+ batch_size=batch_size,
+ shuffle=True if is_train else False,
+ drop_last=True if is_train else False)
- dataloader = paddle.io.DataLoader(dataset,
- batch_sampler=sampler,
- num_workers=num_workers,
- use_shared_memory=use_shared_memory)
+ dataloader = paddle.io.DataLoader(
+ dataset,
+ batch_sampler=sampler,
+ num_workers=num_workers,
+ use_shared_memory=use_shared_memory)
else:
- dataloader = paddle.io.DataLoader(dataset,
- batch_size=batch_size,
- shuffle=True if is_train else False,
- drop_last=True if is_train else False,
- use_shared_memory=use_shared_memory,
- num_workers=num_workers)
+ dataloader = paddle.io.DataLoader(
+ dataset,
+ batch_size=batch_size,
+ shuffle=True if is_train else False,
+ drop_last=True if is_train else False,
+ use_shared_memory=use_shared_memory,
+ num_workers=num_workers)
return dataloader
diff --git a/paddlers/models/ppgan/datasets/common_vision_dataset.py b/paddlers/models/ppgan/datasets/common_vision_dataset.py
index 8b039265..5996add5 100644
--- a/paddlers/models/ppgan/datasets/common_vision_dataset.py
+++ b/paddlers/models/ppgan/datasets/common_vision_dataset.py
@@ -25,6 +25,7 @@ class CommonVisionDataset(paddle.io.Dataset):
"""
Dataset for using paddle vision default datasets, such as mnist, flowers.
"""
+
def __init__(self,
dataset_name,
transforms=None,
diff --git a/paddlers/models/ppgan/datasets/firstorder_dataset.py b/paddlers/models/ppgan/datasets/firstorder_dataset.py
index 31749b4f..5660b781 100644
--- a/paddlers/models/ppgan/datasets/firstorder_dataset.py
+++ b/paddlers/models/ppgan/datasets/firstorder_dataset.py
@@ -48,13 +48,13 @@ def __init__(self, **cfg):
file_idx_set = list(file_idx_set)
if len(file_idx_set) != 0:
if POOL_SIZE == 0:
- for idx in tqdm.tqdm(file_idx_set,
- desc='Extracting frames'):
+ for idx in tqdm.tqdm(
+ file_idx_set, desc='Extracting frames'):
_ = self.frameDataset[idx]
else:
# multiprocessing
- bar = tqdm.tqdm(total=len(file_idx_set),
- desc='Extracting frames')
+ bar = tqdm.tqdm(
+ total=len(file_idx_set), desc='Extracting frames')
with Pool(POOL_SIZE) as pl:
_p = 0
while _p <= len(file_idx_set) - 1:
@@ -90,10 +90,10 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'):
- folder with videos
"""
if name.is_dir():
- frames = sorted(name.iterdir(),
- key=lambda x: int(x.with_suffix('').name))
- video_array = np.array([imread(path) for path in frames],
- dtype='float32')
+ frames = sorted(
+ name.iterdir(), key=lambda x: int(x.with_suffix('').name))
+ video_array = np.array(
+ [imread(path) for path in frames], dtype='float32')
return video_array
elif name.suffix.lower() in ['.gif', '.mp4', '.mov']:
try:
@@ -123,7 +123,8 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'):
except FileExistsError:
pass
for idx, img in enumerate(video_array_reshape):
- cv2.imwrite(str(sub_dir.joinpath('%i.png' % idx)), img[:,:,[2,1,0]])
+ cv2.imwrite(
+ str(sub_dir.joinpath('%i.png' % idx)), img[:, :, [2, 1, 0]])
name.unlink()
return video_array_reshape
else:
@@ -138,6 +139,7 @@ class FramesDataset(Dataset):
- folder with all frames
FramesDataset[i]: obtain sample from i-th video in self.videos
"""
+
def __init__(self, cfg):
self.root_dir = Path(cfg['dataroot'])
self.videos = None
@@ -161,8 +163,8 @@ def __init__(self, cfg):
else:
train_videos = list(self.root_dir.joinpath('train').iterdir())
test_videos = list(self.root_dir.joinpath('test').iterdir())
- self.root_dir = self.root_dir.joinpath(
- 'train' if self.is_train else 'test')
+ self.root_dir = self.root_dir.joinpath('train'
+ if self.is_train else 'test')
if self.is_train:
self.videos = train_videos
@@ -184,23 +186,22 @@ def __getitem__(self, idx):
path = self.videos[idx]
video_name = path.name
if self.is_train and path.is_dir():
- frames = sorted(path.iterdir(),
- key=lambda x: int(x.with_suffix('').name))
+ frames = sorted(
+ path.iterdir(), key=lambda x: int(x.with_suffix('').name))
num_frames = len(frames)
frame_idx = np.sort(
- np.random.choice(num_frames, replace=True, size=2))
+ np.random.choice(
+ num_frames, replace=True, size=2))
video_array = [imread(str(frames[idx])) for idx in frame_idx]
else:
if self.create_frames_folder:
- video_array = read_video(path,
- frame_shape=self.frame_shape,
- saveto='folder')
+ video_array = read_video(
+ path, frame_shape=self.frame_shape, saveto='folder')
self.videos[idx] = path.with_suffix(
'') # rename /xx/xx/xx.gif -> /xx/xx/xx
else:
- video_array = read_video(path,
- frame_shape=self.frame_shape,
- saveto=None)
+ video_array = read_video(
+ path, frame_shape=self.frame_shape, saveto=None)
num_frames = len(video_array)
frame_idx = np.sort(
np.random.choice(
@@ -220,13 +221,14 @@ def __getitem__(self, idx):
if self.is_train:
if self.transform is not None: #modify
t = self.transform(tuple(video_array))
- out['driving'] = t[0].transpose(2, 0, 1).astype(
- np.float32) / 255.0
- out['source'] = t[1].transpose(2, 0, 1).astype(
- np.float32) / 255.0
+ out['driving'] = t[0].transpose(2, 0,
+ 1).astype(np.float32) / 255.0
+ out['source'] = t[1].transpose(2, 0,
+ 1).astype(np.float32) / 255.0
else:
- source = np.array(video_array[0],
- dtype='float32') / 255.0 # shape is [H, W, C]
+ source = np.array(
+ video_array[0],
+ dtype='float32') / 255.0 # shape is [H, W, C]
driving = np.array(
video_array[1],
dtype='float32') / 255.0 # shape is [H, W, C]
@@ -250,6 +252,7 @@ class DatasetRepeater(Dataset):
"""
Pass several times over the same dataset for better i/o performance
"""
+
def __init__(self, dataset, num_repeats=100):
self.dataset = dataset
self.num_repeats = num_repeats
diff --git a/paddlers/models/ppgan/datasets/image_folder.py b/paddlers/models/ppgan/datasets/image_folder.py
index a9eec8aa..12d6a6f1 100644
--- a/paddlers/models/ppgan/datasets/image_folder.py
+++ b/paddlers/models/ppgan/datasets/image_folder.py
@@ -70,8 +70,8 @@ def __init__(self,
imgs = make_dataset(root)
if len(imgs) == 0:
raise (RuntimeError("Found 0 images in: " + root + "\n"
- "Supported image extensions are: " +
- ",".join(IMG_EXTENSIONS)))
+ "Supported image extensions are: " + ",".join(
+ IMG_EXTENSIONS)))
self.root = root
self.imgs = imgs
diff --git a/paddlers/models/ppgan/datasets/paired_dataset.py b/paddlers/models/ppgan/datasets/paired_dataset.py
index 503d9202..70a917e2 100644
--- a/paddlers/models/ppgan/datasets/paired_dataset.py
+++ b/paddlers/models/ppgan/datasets/paired_dataset.py
@@ -20,6 +20,7 @@
class PairedDataset(BaseDataset):
"""A dataset class for paired image dataset.
"""
+
def __init__(self, dataroot, preprocess):
"""Initialize this dataset class.
diff --git a/paddlers/models/ppgan/datasets/preprocess/__init__.py b/paddlers/models/ppgan/datasets/preprocess/__init__.py
index 1712224e..ee1aa3e3 100644
--- a/paddlers/models/ppgan/datasets/preprocess/__init__.py
+++ b/paddlers/models/ppgan/datasets/preprocess/__init__.py
@@ -1,8 +1,7 @@
from .io import LoadImageFromFile, ReadImageSequence, GetNeighboringFramesIdx
-from .transforms import (PairedRandomCrop, PairedRandomHorizontalFlip,
- PairedRandomVerticalFlip, PairedRandomTransposeHW,
- SRPairedRandomCrop, SplitPairedImage, SRNoise,
- NormalizeSequence, MirrorVideoSequence,
- TransposeSequence)
+from .transforms import (
+ PairedRandomCrop, PairedRandomHorizontalFlip, PairedRandomVerticalFlip,
+ PairedRandomTransposeHW, SRPairedRandomCrop, SplitPairedImage, SRNoise,
+ NormalizeSequence, MirrorVideoSequence, TransposeSequence)
from .builder import build_preprocess
diff --git a/paddlers/models/ppgan/datasets/preprocess/builder.py b/paddlers/models/ppgan/datasets/preprocess/builder.py
index bb6c7dec..eaf499d7 100644
--- a/paddlers/models/ppgan/datasets/preprocess/builder.py
+++ b/paddlers/models/ppgan/datasets/preprocess/builder.py
@@ -35,6 +35,7 @@ class Compose(object):
object will call each given :attr:`transforms` sequencely.
"""
+
def __init__(self, functions):
self.functions = functions
diff --git a/paddlers/models/ppgan/datasets/preprocess/io.py b/paddlers/models/ppgan/datasets/preprocess/io.py
index d8ce34e4..bd35a9dd 100644
--- a/paddlers/models/ppgan/datasets/preprocess/io.py
+++ b/paddlers/models/ppgan/datasets/preprocess/io.py
@@ -18,6 +18,7 @@ class LoadImageFromFile(object):
`datas` dict with name of `f'ori_{key}'`. Default: False.
kwargs (dict): Args for file client.
"""
+
def __init__(self,
key='image',
flag=-1,
@@ -74,6 +75,7 @@ class ReadImageSequence(LoadImageFromFile):
`datas` dict with name of `f'ori_{key}'`. Default: False.
kwargs (dict): Args for file client.
"""
+
def __call__(self, datas):
"""Call function.
@@ -130,6 +132,7 @@ class GetNeighboringFramesIdx:
sequence. Default: 0.
filename_tmpl (str): Template for file name. Default: '{:08d}.png'.
"""
+
def __init__(self, interval_list, start_idx=0, filename_tmpl='{:08d}.png'):
self.interval_list = interval_list
self.filename_tmpl = filename_tmpl
diff --git a/paddlers/models/ppgan/datasets/preprocess/transforms.py b/paddlers/models/ppgan/datasets/preprocess/transforms.py
index 3064bb39..00a7128a 100644
--- a/paddlers/models/ppgan/datasets/preprocess/transforms.py
+++ b/paddlers/models/ppgan/datasets/preprocess/transforms.py
@@ -61,8 +61,8 @@ def __call__(self, datas):
data = tuple(data)
for transform in self.transforms:
data = transform(data)
- if hasattr(transform, 'params') and isinstance(
- transform.params, dict):
+ if hasattr(transform, 'params') and isinstance(transform.params,
+ dict):
datas.update(transform.params)
if len(self.input_keys) > 1:
@@ -176,6 +176,7 @@ class PairedRandomTransposeHW(T.BaseTransform):
prob (float): The propability to transpose the images.
keys (list[str]): The images to be transposed.
"""
+
def __init__(self, prob=0.5, keys=None):
self.keys = keys
self.prob = prob
@@ -220,6 +221,7 @@ class TransposeSequence(T.Transpose):
fake_img_seq = transform(fake_img_seq)
"""
+
def _apply_image(self, img):
if isinstance(img, list):
imgs = []
@@ -277,6 +279,7 @@ class NormalizeSequence(T.Normalize):
fake_img_seq = normalize_seq(fake_img_seq)
"""
+
def _apply_image(self, img):
if isinstance(img, list):
imgs = [
@@ -302,6 +305,7 @@ class SRPairedRandomCrop(T.BaseTransform):
scale (int): model upscale factor.
gt_patch_size (int): cropped gt patch size.
"""
+
def __init__(self, scale, gt_patch_size, scale_list=False, keys=None):
self.gt_patch_size = gt_patch_size
self.scale = scale
@@ -339,16 +343,16 @@ def __call__(self, inputs):
]
top_gt, left_gt = int(top * scale), int(left * scale)
gt = [
- v[top_gt:top_gt + self.gt_patch_size,
- left_gt:left_gt + self.gt_patch_size, ...] for v in gt
+ v[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt +
+ self.gt_patch_size, ...] for v in gt
]
else:
# crop lq patch
lq = lq[top:top + lq_patch_size, left:left + lq_patch_size, ...]
# crop corresponding gt patch
top_gt, left_gt = int(top * scale), int(left * scale)
- gt = gt[top_gt:top_gt + self.gt_patch_size,
- left_gt:left_gt + self.gt_patch_size, ...]
+ gt = gt[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt +
+ self.gt_patch_size, ...]
if self.scale_list and self.scale == 4:
lqx2 = F.resize(gt, (lq_patch_size * 2, lq_patch_size * 2),
@@ -368,14 +372,14 @@ class SRNoise(T.BaseTransform):
noise_path (str): directory of noise image.
size (int): cropped noise patch size.
"""
+
def __init__(self, noise_path, size, keys=None):
self.noise_path = noise_path
self.noise_imgs = sorted(glob.glob(noise_path + '*.png'))
self.size = size
self.keys = keys
self.transform = T.Compose([
- T.RandomCrop(size),
- T.Transpose(),
+ T.RandomCrop(size), T.Transpose(),
T.Normalize([0., 0., 0.], [255., 255., 255.])
])
@@ -396,6 +400,7 @@ class RandomResizedCropProb(T.RandomResizedCrop):
prob (float): probabilty of using random-resized cropping.
size (int): cropped size.
"""
+
def __init__(self, prob, size, scale, ratio, interpolation, keys=None):
super().__init__(size, scale, ratio, interpolation)
self.prob = prob
@@ -480,21 +485,14 @@ def _apply_image(self, image):
@TRANSFORMS.register()
class PairedColorJitter(T.BaseTransform):
- def __init__(self,
- brightness=0,
- contrast=0,
- saturation=0,
- hue=0,
+ def __init__(self, brightness=0, contrast=0, saturation=0, hue=0,
keys=None):
super().__init__(keys=keys)
self.brightness = T.transforms._check_input(brightness, 'brightness')
self.contrast = T.transforms._check_input(contrast, 'contrast')
self.saturation = T.transforms._check_input(saturation, 'saturation')
- self.hue = T.transforms._check_input(hue,
- 'hue',
- center=0,
- bound=(-0.5, 0.5),
- clip_first_on_zero=False)
+ self.hue = T.transforms._check_input(
+ hue, 'hue', center=0, bound=(-0.5, 0.5), clip_first_on_zero=False)
def _get_params(self, input):
"""Get a randomized transform to be applied on image.
@@ -545,6 +543,7 @@ class MirrorVideoSequence:
Args:
keys (list[str]): The frame lists to be extended.
"""
+
def __init__(self, keys=None):
self.keys = keys
diff --git a/paddlers/models/ppgan/datasets/single_dataset.py b/paddlers/models/ppgan/datasets/single_dataset.py
index 98661567..29f4259c 100644
--- a/paddlers/models/ppgan/datasets/single_dataset.py
+++ b/paddlers/models/ppgan/datasets/single_dataset.py
@@ -20,6 +20,7 @@
class SingleDataset(BaseDataset):
"""
"""
+
def __init__(self, dataroot, preprocess):
"""Initialize single dataset class.
diff --git a/paddlers/models/ppgan/datasets/starganv2_dataset.py b/paddlers/models/ppgan/datasets/starganv2_dataset.py
index 0985b13c..39fa232a 100644
--- a/paddlers/models/ppgan/datasets/starganv2_dataset.py
+++ b/paddlers/models/ppgan/datasets/starganv2_dataset.py
@@ -107,6 +107,7 @@ def __len__(self):
class StarGANv2Dataset(BaseDataset):
"""
"""
+
def __init__(self, dataroot, is_train, preprocess, test_count=0):
"""Initialize single dataset class.
@@ -125,10 +126,10 @@ def __init__(self, dataroot, is_train, preprocess, test_count=0):
else:
files = os.listdir(self.dataroot)
if 'src' in files and 'ref' in files:
- self.src_loader = ImageFolder(os.path.join(
- self.dataroot, 'src'))
- self.ref_loader = ImageFolder(os.path.join(
- self.dataroot, 'ref'))
+ self.src_loader = ImageFolder(
+ os.path.join(self.dataroot, 'src'))
+ self.ref_loader = ImageFolder(
+ os.path.join(self.dataroot, 'ref'))
else:
self.src_loader = ImageFolder(self.dataroot)
self.ref_loader = ImageFolder(self.dataroot)
diff --git a/paddlers/models/ppgan/datasets/unpaired_dataset.py b/paddlers/models/ppgan/datasets/unpaired_dataset.py
index b55cb7c7..a49767aa 100644
--- a/paddlers/models/ppgan/datasets/unpaired_dataset.py
+++ b/paddlers/models/ppgan/datasets/unpaired_dataset.py
@@ -23,6 +23,7 @@
class UnpairedDataset(BaseDataset):
"""
"""
+
def __init__(self, dataroot_a, dataroot_b, max_size, is_train, preprocess):
"""Initialize unpaired dataset class.
diff --git a/paddlers/models/ppgan/engine/trainer.py b/paddlers/models/ppgan/engine/trainer.py
index 9184e641..74ecf21e 100644
--- a/paddlers/models/ppgan/engine/trainer.py
+++ b/paddlers/models/ppgan/engine/trainer.py
@@ -29,6 +29,7 @@
from ..utils.timer import TimeAverager
from ..utils.profiler import add_profiler_step
+
class IterLoader:
def __init__(self, dataloader):
self._dataloader = dataloader
@@ -71,6 +72,7 @@ class Trainer:
# | ||
# save checkpoint (model.nets) \/
"""
+
def __init__(self, cfg):
# base config
self.logger = logging.getLogger(__name__)
@@ -220,8 +222,8 @@ def train(self):
def test(self):
if not hasattr(self, 'test_dataloader'):
- self.test_dataloader = build_dataloader(self.cfg.dataset.test,
- is_train=False)
+ self.test_dataloader = build_dataloader(
+ self.cfg.dataset.test, is_train=False)
iter_loader = IterLoader(self.test_dataloader)
if self.max_eval_steps is None:
self.max_eval_steps = len(self.test_dataloader)
@@ -235,9 +237,8 @@ def test(self):
for i in range(self.max_eval_steps):
if self.max_eval_steps < self.log_interval or i % self.log_interval == 0:
- self.logger.info('Test iter: [%d/%d]' %
- (i * self.world_size,
- self.max_eval_steps * self.world_size))
+ self.logger.info('Test iter: [%d/%d]' % (
+ i * self.world_size, self.max_eval_steps * self.world_size))
data = next(iter_loader)
self.model.setup_input(data)
@@ -248,8 +249,8 @@ def test(self):
current_paths = self.model.get_image_paths()
current_visuals = self.model.get_current_visuals()
- if len(current_visuals) > 0 and list(
- current_visuals.values())[0].shape == 4:
+ if len(current_visuals) > 0 and list(current_visuals.values())[
+ 0].shape == 4:
num_samples = list(current_visuals.values())[0].shape[0]
else:
num_samples = 1
@@ -267,10 +268,11 @@ def test(self):
else:
visual_results.update({name: img_tensor})
- self.visual('visual_test',
- visual_results=visual_results,
- step=self.batch_id,
- is_save_image=True)
+ self.visual(
+ 'visual_test',
+ visual_results=visual_results,
+ step=self.batch_id,
+ is_save_image=True)
if self.metrics:
for metric_name, metric in self.metrics.items():
@@ -398,9 +400,9 @@ def save(self, epoch, name='checkpoint', keep=1):
try:
if self.by_epoch:
checkpoint_name_to_be_removed = os.path.join(
- self.output_dir, 'epoch_%s_%s.pdparams' %
- ((epoch - keep * self.weight_interval) //
- self.iters_per_epoch, name))
+ self.output_dir, 'epoch_%s_%s.pdparams' % (
+ (epoch - keep * self.weight_interval) //
+ self.iters_per_epoch, name))
else:
checkpoint_name_to_be_removed = os.path.join(
self.output_dir, 'iter_%s_%s.pdparams' %
@@ -432,8 +434,8 @@ def load(self, weight_path):
for net_name, net in self.model.nets.items():
if net_name in state_dicts:
net.set_state_dict(state_dicts[net_name])
- self.logger.info(
- 'Loaded pretrained weight for net {}'.format(net_name))
+ self.logger.info('Loaded pretrained weight for net {}'.format(
+ net_name))
else:
self.logger.warning(
'Can not find state dict of net {}. Skip load pretrained weight for net {}'
diff --git a/paddlers/models/ppgan/faceutils/dlibutils/face_align.py b/paddlers/models/ppgan/faceutils/dlibutils/face_align.py
index f18c5d9f..a03a4bda 100644
--- a/paddlers/models/ppgan/faceutils/dlibutils/face_align.py
+++ b/paddlers/models/ppgan/faceutils/dlibutils/face_align.py
@@ -56,7 +56,8 @@ def align(image, lms):
# rotation angle
left_eye_corner = lms[36]
right_eye_corner = lms[45]
- radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) / (left_eye_corner[0] - right_eye_corner[0]))
+ radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) /
+ (left_eye_corner[0] - right_eye_corner[0]))
# image size after rotating
height, width, _ = image.shape
@@ -73,7 +74,8 @@ def align(image, lms):
M = np.array([[cos, sin, (1 - cos) * width / 2. - sin * height / 2. + Tx],
[-sin, cos, sin * width / 2. + (1 - cos) * height / 2. + Ty]])
- image_rotate = cv2.warpAffine(image, M, (new_w, new_h), borderValue=(255, 255, 255))
+ image_rotate = cv2.warpAffine(
+ image, M, (new_w, new_h), borderValue=(255, 255, 255))
landmarks = np.concatenate([lms, np.ones((lms.shape[0], 1))], axis=1)
landmarks_rotate = np.dot(M, landmarks.T).T
@@ -99,7 +101,8 @@ def crop(image, lms):
top -= ((right - left) - (bottom - top)) // 2
bottom = top + (right - left)
- image_crop = np.ones((bottom - top + 1, right - left + 1, 3), np.uint8) * 255
+ image_crop = np.ones((bottom - top + 1, right - left + 1, 3),
+ np.uint8) * 255
h, w = image.shape[:2]
left_white = max(0, -left)
@@ -111,5 +114,6 @@ def crop(image, lms):
bottom = min(bottom, h - 1)
bottom_white = top_white + (bottom - top)
- image_crop[top_white:bottom_white+1, left_white:right_white+1] = image[top:bottom+1, left:right+1].copy()
+ image_crop[top_white:bottom_white + 1, left_white:right_white + 1] = image[
+ top:bottom + 1, left:right + 1].copy()
return image_crop
diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py
index f4f2b89d..b2fc020e 100644
--- a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py
+++ b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py
@@ -65,7 +65,8 @@ def batch_detect(net, img_batch):
ymax = pred[:, 2:3]
locs = np.concatenate((xmin, ymin, xmax, ymax), axis=1)
bboxlists.append(
- np.concatenate((locs * orig_size + shift, scores), axis=1))
+ np.concatenate(
+ (locs * orig_size + shift, scores), axis=1))
return bboxlists
diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py
index 9e182708..fcdb4856 100644
--- a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py
+++ b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py
@@ -22,18 +22,19 @@ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1):
padding = (kernel_size - 1) // 2
self.convs = nn.Sequential(
- nn.Conv2D(in_channels=in_channels,
- out_channels=in_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- groups=in_channels),
- nn.Conv2D(in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=1,
- stride=1,
- padding=0),
- )
+ nn.Conv2D(
+ in_channels=in_channels,
+ out_channels=in_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ groups=in_channels),
+ nn.Conv2D(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=1,
+ stride=1,
+ padding=0), )
self.act = nn.ReLU()
@@ -52,6 +53,7 @@ def forward(self, x):
class BlazeFace(nn.Layer):
"""The BlazeFace face detection model.
"""
+
def __init__(self):
super(BlazeFace, self).__init__()
@@ -70,32 +72,34 @@ def __init__(self):
def _define_layers(self):
self.backbone1 = nn.Sequential(
- nn.Conv2D(in_channels=3,
- out_channels=24,
- kernel_size=5,
- stride=2,
- padding=0),
+ nn.Conv2D(
+ in_channels=3,
+ out_channels=24,
+ kernel_size=5,
+ stride=2,
+ padding=0),
nn.ReLU(),
BlazeBlock(24, 24),
BlazeBlock(24, 28),
- BlazeBlock(28, 32, stride=2),
+ BlazeBlock(
+ 28, 32, stride=2),
BlazeBlock(32, 36),
BlazeBlock(36, 42),
- BlazeBlock(42, 48, stride=2),
+ BlazeBlock(
+ 42, 48, stride=2),
BlazeBlock(48, 56),
BlazeBlock(56, 64),
BlazeBlock(64, 72),
BlazeBlock(72, 80),
- BlazeBlock(80, 88),
- )
+ BlazeBlock(80, 88), )
self.backbone2 = nn.Sequential(
- BlazeBlock(88, 96, stride=2),
- BlazeBlock(96, 96),
+ BlazeBlock(
+ 88, 96, stride=2),
BlazeBlock(96, 96),
BlazeBlock(96, 96),
BlazeBlock(96, 96),
- )
+ BlazeBlock(96, 96), )
self.classifier_8 = nn.Conv2D(88, 2, 1)
self.classifier_16 = nn.Conv2D(96, 6, 1)
@@ -240,8 +244,8 @@ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors):
output_detections = []
for i in range(raw_box_tensor.shape[0]):
boxes = paddle.to_tensor(detection_boxes[i, mask[i]])
- scores = paddle.to_tensor(
- detection_scores[i, mask[i]]).unsqueeze(axis=-1)
+ scores = paddle.to_tensor(detection_scores[i, mask[i]]).unsqueeze(
+ axis=-1)
output_detections.append(paddle.concat((boxes, scores), axis=-1))
return output_detections
@@ -296,8 +300,8 @@ def _weighted_non_max_suppression(self, detections):
first_box = detection[:4]
other_boxes = detections[remaining, :4]
- ious = overlap_similarity(paddle.to_tensor(first_box),
- paddle.to_tensor(other_boxes))
+ ious = overlap_similarity(
+ paddle.to_tensor(first_box), paddle.to_tensor(other_boxes))
mask = ious > self.min_suppression_threshold
mask = mask.numpy()
diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py
index b5493cc6..8b6a3186 100644
--- a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py
+++ b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py
@@ -95,8 +95,8 @@ def batch_detect(net, imgs):
box = batch_decode(paddle.to_tensor(loc), priors, variances)
box = box[:, 0] * 1.0
bboxlist.append(
- paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)],
- 1).numpy())
+ paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)], 1)
+ .numpy())
bboxlist = np.array(bboxlist)
if 0 == len(bboxlist):
bboxlist = np.zeros((1, BB, 5))
diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py
index aa5a7db7..3fcd4ad3 100644
--- a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py
+++ b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py
@@ -23,8 +23,8 @@ def __init__(self, n_channels, scale=1.0):
self.n_channels = n_channels
self.scale = scale
self.eps = 1e-10
- self.weight = paddle.create_parameter(shape=[self.n_channels],
- dtype='float32')
+ self.weight = paddle.create_parameter(
+ shape=[self.n_channels], dtype='float32')
self.weight.set_value(paddle.zeros([self.n_channels]) + self.scale)
def forward(self, x):
@@ -67,67 +67,31 @@ def __init__(self):
self.conv4_3_norm = L2Norm(512, scale=8)
self.conv5_3_norm = L2Norm(512, scale=5)
- self.conv3_3_norm_mbox_conf = nn.Conv2D(256,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv3_3_norm_mbox_loc = nn.Conv2D(256,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv4_3_norm_mbox_conf = nn.Conv2D(512,
- 2,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv4_3_norm_mbox_loc = nn.Conv2D(512,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv5_3_norm_mbox_conf = nn.Conv2D(512,
- 2,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv5_3_norm_mbox_loc = nn.Conv2D(512,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
-
- self.fc7_mbox_conf = nn.Conv2D(1024,
- 2,
- kernel_size=3,
- stride=1,
- padding=1)
- self.fc7_mbox_loc = nn.Conv2D(1024,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv6_2_mbox_conf = nn.Conv2D(512,
- 2,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv6_2_mbox_loc = nn.Conv2D(512,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv7_2_mbox_conf = nn.Conv2D(256,
- 2,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv7_2_mbox_loc = nn.Conv2D(256,
- 4,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.conv3_3_norm_mbox_conf = nn.Conv2D(
+ 256, 4, kernel_size=3, stride=1, padding=1)
+ self.conv3_3_norm_mbox_loc = nn.Conv2D(
+ 256, 4, kernel_size=3, stride=1, padding=1)
+ self.conv4_3_norm_mbox_conf = nn.Conv2D(
+ 512, 2, kernel_size=3, stride=1, padding=1)
+ self.conv4_3_norm_mbox_loc = nn.Conv2D(
+ 512, 4, kernel_size=3, stride=1, padding=1)
+ self.conv5_3_norm_mbox_conf = nn.Conv2D(
+ 512, 2, kernel_size=3, stride=1, padding=1)
+ self.conv5_3_norm_mbox_loc = nn.Conv2D(
+ 512, 4, kernel_size=3, stride=1, padding=1)
+
+ self.fc7_mbox_conf = nn.Conv2D(
+ 1024, 2, kernel_size=3, stride=1, padding=1)
+ self.fc7_mbox_loc = nn.Conv2D(
+ 1024, 4, kernel_size=3, stride=1, padding=1)
+ self.conv6_2_mbox_conf = nn.Conv2D(
+ 512, 2, kernel_size=3, stride=1, padding=1)
+ self.conv6_2_mbox_loc = nn.Conv2D(
+ 512, 4, kernel_size=3, stride=1, padding=1)
+ self.conv7_2_mbox_conf = nn.Conv2D(
+ 256, 2, kernel_size=3, stride=1, padding=1)
+ self.conv7_2_mbox_loc = nn.Conv2D(
+ 256, 4, kernel_size=3, stride=1, padding=1)
def forward(self, x):
h = F.relu(self.conv1_1(x))
diff --git a/paddlers/models/ppgan/faceutils/face_detection/utils.py b/paddlers/models/ppgan/faceutils/face_detection/utils.py
index 6590f966..b7a0b5b2 100644
--- a/paddlers/models/ppgan/faceutils/face_detection/utils.py
+++ b/paddlers/models/ppgan/faceutils/face_detection/utils.py
@@ -55,8 +55,8 @@ def crop(image, center, scale, resolution=256.0):
br = transform([resolution, resolution], center, scale, resolution, True)
br = br.numpy()
if image.ndim > 2:
- newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]],
- dtype=np.int32)
+ newDim = np.array(
+ [br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32)
newImg = np.zeros(newDim, dtype=np.uint8)
else:
newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int)
@@ -69,10 +69,10 @@ def crop(image, center, scale, resolution=256.0):
[max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32)
oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32)
oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32)
- newImg[newY[0] - 1:newY[1],
- newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[1],
- oldX[0] - 1:oldX[1], :]
- newImg = cv2.resize(newImg,
- dsize=(int(resolution), int(resolution)),
- interpolation=cv2.INTER_LINEAR)
+ newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[
+ 1], oldX[0] - 1:oldX[1], :]
+ newImg = cv2.resize(
+ newImg,
+ dsize=(int(resolution), int(resolution)),
+ interpolation=cv2.INTER_LINEAR)
return newImg
diff --git a/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py b/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py
index 055fc0ba..038feb9e 100644
--- a/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py
+++ b/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py
@@ -25,11 +25,7 @@
class FaceEnhancement(object):
- def __init__(self,
- path_to_enhance=None,
- size = 512,
- batch_size=1
- ):
+ def __init__(self, path_to_enhance=None, size=512, batch_size=1):
super(FaceEnhancement, self).__init__()
# Initialise the face detector
@@ -38,17 +34,19 @@ def __init__(self,
model_weights = paddle.load(model_weights_path)
else:
model_weights = paddle.load(path_to_enhance)
-
+
self.face_enhance = GPEN(size=512, style_dim=512, n_mlp=8)
self.face_enhance.load_dict(model_weights)
self.face_enhance.eval()
self.size = size
self.mask = np.zeros((512, 512), np.float32)
- cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA)
+ cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1,
+ cv2.LINE_AA)
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11)
- self.mask = paddle.tile(paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1), repeat_times=[batch_size,1,1,3]).numpy()
-
+ self.mask = paddle.tile(
+ paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1),
+ repeat_times=[batch_size, 1, 1, 3]).numpy()
def enhance_from_image(self, img):
if isinstance(img, np.ndarray):
@@ -65,14 +63,14 @@ def enhance_from_batch(self, img):
else:
assert img.shape[1:] == [3, 512, 512]
img_ori = img.transpose([0, 2, 3, 1]).numpy()
- img_t = (img/255. - 0.5) / 0.5
-
+ img_t = (img / 255. - 0.5) / 0.5
+
with paddle.no_grad():
out, __ = self.face_enhance(img_t)
-
+
image_tensor = out * 0.5 + 0.5
- image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB
+ image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB
image_numpy = paddle.clip(image_tensor, 0, 1) * 255.0
-
+
out = image_numpy.astype(np.uint8).cpu().numpy()
- return out * self.mask + (1-self.mask) * img_ori
+ return out * self.mask + (1 - self.mask) * img_ori
diff --git a/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py b/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py
index 113e80b2..99d3e2ee 100644
--- a/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py
+++ b/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py
@@ -21,13 +21,13 @@
from .fcn import FCN
from .hrnet import HRNet_W18
-
BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/faceseg_FCN-HRNetW18.pdparams'
class FaceSeg:
def __init__(self):
- save_pth = get_path_from_url(BISENET_WEIGHT_URL, osp.split(osp.realpath(__file__))[0])
+ save_pth = get_path_from_url(BISENET_WEIGHT_URL,
+ osp.split(osp.realpath(__file__))[0])
self.net = FCN(num_classes=2, backbone=HRNet_W18())
state_dict = paddle.load(save_pth)
@@ -47,7 +47,8 @@ def __call__(self, image):
return mask
def input_transform(self, image):
- image_input = cv2.resize(image, (384, 384), interpolation=cv2.INTER_AREA)
+ image_input = cv2.resize(
+ image, (384, 384), interpolation=cv2.INTER_AREA)
image_input = (image_input / 255.)[np.newaxis, :, :, :]
image_input = np.transpose(image_input, (0, 3, 1, 2)).astype(np.float32)
image_input = paddle.to_tensor(image_input)
diff --git a/paddlers/models/ppgan/metrics/lpips.py b/paddlers/models/ppgan/metrics/lpips.py
index b9890963..65efd6c2 100644
--- a/paddlers/models/ppgan/metrics/lpips.py
+++ b/paddlers/models/ppgan/metrics/lpips.py
@@ -45,6 +45,7 @@ class LPIPSMetric(paddle.metric.Metric):
Returns:
float: lpips result.
"""
+
def __init__(self, net='vgg', version='0.1', mean=None, std=None):
self.net = net
self.version = version
@@ -76,10 +77,10 @@ def update(self, preds, gts):
for pred, gt in zip(preds, gts):
pred, gt = pred.astype(np.float32) / 255., gt.astype(
np.float32) / 255.
- pred = paddle.vision.transforms.normalize(pred.transpose([2, 0, 1]),
- self.mean, self.std)
- gt = paddle.vision.transforms.normalize(gt.transpose([2, 0, 1]),
- self.mean, self.std)
+ pred = paddle.vision.transforms.normalize(
+ pred.transpose([2, 0, 1]), self.mean, self.std)
+ gt = paddle.vision.transforms.normalize(
+ gt.transpose([2, 0, 1]), self.mean, self.std)
with paddle.no_grad():
value = self.loss_fn(
@@ -110,11 +111,13 @@ def spatial_average(in_tens, keepdim=True):
# assumes scale factor is same for H and W
def upsample(in_tens, out_HW=(64, 64)):
in_H, in_W = in_tens.shape[2], in_tens.shape[3]
- scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[1] / in_W
+ scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[
+ 1] / in_W
- return nn.Upsample(scale_factor=(scale_factor_H, scale_factor_W),
- mode='bilinear',
- align_corners=False)(in_tens)
+ return nn.Upsample(
+ scale_factor=(scale_factor_H, scale_factor_W),
+ mode='bilinear',
+ align_corners=False)(in_tens)
def normalize_tensor(in_feat, eps=1e-10):
@@ -143,8 +146,8 @@ def __init__(self,
if (verbose):
print(
'Setting up [%s] perceptual loss: trunk [%s], v[%s], spatial [%s]'
- % ('LPIPS' if lpips else 'baseline', net, version,
- 'on' if spatial else 'off'))
+ % ('LPIPS' if lpips else 'baseline', net, version, 'on'
+ if spatial else 'off'))
self.pnet_type = net
self.pnet_tune = pnet_tune
@@ -207,31 +210,35 @@ def forward(self, in0, in1, retPerLayer=False, normalize=False):
feats0, feats1, diffs = {}, {}, {}
for kk in range(self.L):
- feats0[kk], feats1[kk] = normalize_tensor(
- outs0[kk]), normalize_tensor(outs1[kk])
+ feats0[kk], feats1[kk] = normalize_tensor(outs0[
+ kk]), normalize_tensor(outs1[kk])
diffs[kk] = (feats0[kk] - feats1[kk])**2
if (self.lpips):
if (self.spatial):
res = [
- upsample(self.lins[kk].model(diffs[kk]),
- out_HW=in0.shape[2:]) for kk in range(self.L)
+ upsample(
+ self.lins[kk].model(diffs[kk]), out_HW=in0.shape[2:])
+ for kk in range(self.L)
]
else:
res = [
- spatial_average(self.lins[kk].model(diffs[kk]),
- keepdim=True) for kk in range(self.L)
+ spatial_average(
+ self.lins[kk].model(diffs[kk]), keepdim=True)
+ for kk in range(self.L)
]
else:
if (self.spatial):
res = [
- upsample(diffs[kk].sum(dim=1, keepdim=True),
- out_HW=in0.shape[2:]) for kk in range(self.L)
+ upsample(
+ diffs[kk].sum(dim=1, keepdim=True),
+ out_HW=in0.shape[2:]) for kk in range(self.L)
]
else:
res = [
- spatial_average(diffs[kk].sum(dim=1, keepdim=True),
- keepdim=True) for kk in range(self.L)
+ spatial_average(
+ diffs[kk].sum(dim=1, keepdim=True), keepdim=True)
+ for kk in range(self.L)
]
val = res[0]
@@ -251,8 +258,7 @@ def __init__(self):
'shift',
paddle.to_tensor([-.030, -.088, -.188]).reshape([1, 3, 1, 1]))
self.register_buffer(
- 'scale',
- paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1]))
+ 'scale', paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1]))
def forward(self, inp):
return (inp - self.shift) / self.scale
@@ -260,14 +266,14 @@ def forward(self, inp):
class NetLinLayer(nn.Layer):
''' A single linear layer which does a 1x1 conv '''
+
def __init__(self, chn_in, chn_out=1, use_dropout=False):
super(NetLinLayer, self).__init__()
- layers = [
- nn.Dropout(),
- ] if (use_dropout) else []
+ layers = [nn.Dropout(), ] if (use_dropout) else []
layers += [
- nn.Conv2D(chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False),
+ nn.Conv2D(
+ chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False),
]
self.model = nn.Sequential(*layers)
diff --git a/paddlers/models/ppgan/metrics/psnr_ssim.py b/paddlers/models/ppgan/metrics/psnr_ssim.py
index 7ed288a2..af19c292 100644
--- a/paddlers/models/ppgan/metrics/psnr_ssim.py
+++ b/paddlers/models/ppgan/metrics/psnr_ssim.py
@@ -170,9 +170,8 @@ def _ssim(img1, img2):
sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq
sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2
- ssim_map = ((2 * mu1_mu2 + C1) *
- (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) *
- (sigma1_sq + sigma2_sq + C2))
+ ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / (
+ (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2))
return ssim_map.mean()
diff --git a/paddlers/models/ppgan/models/animeganv2_model.py b/paddlers/models/ppgan/models/animeganv2_model.py
index c2ee5de2..71c3e8f3 100644
--- a/paddlers/models/ppgan/models/animeganv2_model.py
+++ b/paddlers/models/ppgan/models/animeganv2_model.py
@@ -29,6 +29,7 @@
class AnimeGANV2Model(BaseModel):
""" This class implements the AnimeGANV2 model.
"""
+
def __init__(self,
generator,
discriminator=None,
@@ -126,10 +127,11 @@ def con_sty_loss(self, real, anime, fake):
@staticmethod
def rgb2yuv(rgb):
- kernel = paddle.to_tensor([[0.299, -0.14714119, 0.61497538],
- [0.587, -0.28886916, -0.51496512],
- [0.114, 0.43601035, -0.10001026]],
- dtype='float32')
+ kernel = paddle.to_tensor(
+ [[0.299, -0.14714119, 0.61497538],
+ [0.587, -0.28886916, -0.51496512],
+ [0.114, 0.43601035, -0.10001026]],
+ dtype='float32')
rgb = paddle.transpose(rgb, (0, 2, 3, 1))
yuv = paddle.matmul(rgb, kernel)
return yuv
diff --git a/paddlers/models/ppgan/models/base_model.py b/paddlers/models/ppgan/models/base_model.py
index ae4ecd2b..a20d77af 100644
--- a/paddlers/models/ppgan/models/base_model.py
+++ b/paddlers/models/ppgan/models/base_model.py
@@ -49,6 +49,7 @@ class BaseModel(ABC):
# save checkpoint (model.nets) \/
"""
+
def __init__(self, params=None):
"""Initialize the BaseModel class.
@@ -126,8 +127,8 @@ def setup_optimizers(self, lr, cfg):
parameters = []
for net_name in net_names:
parameters += self.nets[net_name].parameters()
- self.optimizers[opt_name] = build_optimizer(
- cfg_, lr, parameters)
+ self.optimizers[opt_name] = build_optimizer(cfg_, lr,
+ parameters)
return self.optimizers
@@ -187,17 +188,15 @@ def export_model(self, export_model, output_dir=None, inputs_size=[]):
inputs_num = 0
for net in export_model:
input_spec = [
- paddle.static.InputSpec(shape=inputs_size[inputs_num + i],
- dtype="float32")
+ paddle.static.InputSpec(
+ shape=inputs_size[inputs_num + i], dtype="float32")
for i in range(net["inputs_num"])
]
inputs_num = inputs_num + net["inputs_num"]
- static_model = paddle.jit.to_static(self.nets[net["name"]],
- input_spec=input_spec)
+ static_model = paddle.jit.to_static(
+ self.nets[net["name"]], input_spec=input_spec)
if output_dir is None:
output_dir = 'inference_model'
- paddle.jit.save(
- static_model,
- os.path.join(
- output_dir, '{}_{}'.format(self.__class__.__name__.lower(),
- net["name"])))
+ paddle.jit.save(static_model,
+ os.path.join(output_dir, '{}_{}'.format(
+ self.__class__.__name__.lower(), net["name"])))
diff --git a/paddlers/models/ppgan/models/basicvsr_model.py b/paddlers/models/ppgan/models/basicvsr_model.py
index 54a9b545..f9afec44 100644
--- a/paddlers/models/ppgan/models/basicvsr_model.py
+++ b/paddlers/models/ppgan/models/basicvsr_model.py
@@ -29,6 +29,7 @@ class BasicVSRModel(BaseSRModel):
Paper: BasicVSR: The Search for Essential Components in Video Super-Resolution and Beyond, CVPR, 2021
"""
+
def __init__(self, generator, fix_iter, lr_mult, pixel_criterion=None):
"""Initialize the BasicVSR class.
diff --git a/paddlers/models/ppgan/models/criterions/gan_loss.py b/paddlers/models/ppgan/models/criterions/gan_loss.py
index d3fbcda4..6cc1ccbb 100644
--- a/paddlers/models/ppgan/models/criterions/gan_loss.py
+++ b/paddlers/models/ppgan/models/criterions/gan_loss.py
@@ -27,6 +27,7 @@ class GANLoss(nn.Layer):
The GANLoss class abstracts away the need to create the target label tensor
that has the same size as the input.
"""
+
def __init__(self,
gan_mode,
target_real_label=1.0,
diff --git a/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py b/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py
index f90ed465..d604a979 100644
--- a/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py
+++ b/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py
@@ -9,6 +9,7 @@
from ppgan.utils.download import get_path_from_url
from .builder import CRITERIONS
+
class ConvBlock(nn.Layer):
def __init__(self, input_channels, output_channels, groups, name=None):
super(ConvBlock, self).__init__()
@@ -63,6 +64,7 @@ def forward(self, inputs):
x = self._pool(x)
return x
+
class VGG19(nn.Layer):
def __init__(self, layers=19, class_dim=1000):
super(VGG19, self).__init__()
@@ -88,13 +90,13 @@ def __init__(self, layers=19, class_dim=1000):
self._drop = Dropout(p=0.5, mode="downscale_in_infer")
self._fc1 = Linear(
7 * 7 * 512,
- 4096,)
+ 4096, )
self._fc2 = Linear(
4096,
- 4096,)
+ 4096, )
self._out = Linear(
4096,
- class_dim,)
+ class_dim, )
def forward(self, inputs):
features = []
@@ -119,14 +121,16 @@ def forward(self, inputs):
x = self._out(x)
return x, features
+
@CRITERIONS.register()
class PhotoPenPerceptualLoss(nn.Layer):
- def __init__(self,
- crop_size,
- lambda_vgg,
-# pretrained='test/vgg19pretrain.pdparams',
- pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams',
- ):
+ def __init__(
+ self,
+ crop_size,
+ lambda_vgg,
+ # pretrained='test/vgg19pretrain.pdparams',
+ pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams',
+ ):
super(PhotoPenPerceptualLoss, self).__init__()
self.model = VGG19()
weight_path = get_path_from_url(pretrained)
@@ -136,7 +140,7 @@ def __init__(self,
self.rates = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0]
self.crop_size = crop_size
self.lambda_vgg = lambda_vgg
-
+
def forward(self, img_r, img_f):
img_r = F.interpolate(img_r, (self.crop_size, self.crop_size))
img_f = F.interpolate(img_f, (self.crop_size, self.crop_size))
@@ -146,5 +150,5 @@ def forward(self, img_r, img_f):
for i in range(len(feat_r)):
g_vggloss += self.rates[i] * nn.L1Loss()(feat_r[i], feat_f[i])
g_vggloss *= self.lambda_vgg
-
+
return g_vggloss
diff --git a/paddlers/models/ppgan/models/criterions/pixel_loss.py b/paddlers/models/ppgan/models/criterions/pixel_loss.py
index 6e878ad7..62c7f5dc 100644
--- a/paddlers/models/ppgan/models/criterions/pixel_loss.py
+++ b/paddlers/models/ppgan/models/criterions/pixel_loss.py
@@ -31,6 +31,7 @@ class L1Loss():
loss_weight (float): Loss weight for L1 loss. Default: 1.0.
"""
+
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@@ -59,6 +60,7 @@ class CharbonnierLoss():
eps (float): Default: 1e-12.
"""
+
def __init__(self, eps=1e-12, reduction='sum'):
self.eps = eps
self.reduction = reduction
@@ -90,6 +92,7 @@ class MSELoss():
loss_weight (float): Loss weight for MSE loss. Default: 1.0.
"""
+
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@@ -119,6 +122,7 @@ class BCEWithLogitsLoss():
Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'.
loss_weight (float): Loss weight for MSE loss. Default: 1.0.
"""
+
def __init__(self, reduction='mean', loss_weight=1.0):
# when loss weight less than zero return None
if loss_weight <= 0:
@@ -161,6 +165,7 @@ def calc_emd_loss(pred, target):
class CalcStyleEmdLoss():
"""Calc Style Emd Loss.
"""
+
def __init__(self):
super(CalcStyleEmdLoss, self).__init__()
@@ -183,6 +188,7 @@ def __call__(self, pred, target):
class CalcContentReltLoss():
"""Calc Content Relt Loss.
"""
+
def __init__(self):
super(CalcContentReltLoss, self).__init__()
@@ -207,6 +213,7 @@ def __call__(self, pred, target):
class CalcContentLoss():
"""Calc Content Loss.
"""
+
def __init__(self):
self.mse_loss = nn.MSELoss()
@@ -221,14 +228,15 @@ def __call__(self, pred, target, norm=False):
if (norm == False):
return self.mse_loss(pred, target)
else:
- return self.mse_loss(mean_variance_norm(pred),
- mean_variance_norm(target))
+ return self.mse_loss(
+ mean_variance_norm(pred), mean_variance_norm(target))
@CRITERIONS.register()
class CalcStyleLoss():
"""Calc Style Loss.
"""
+
def __init__(self):
self.mse_loss = nn.MSELoss()
@@ -241,31 +249,31 @@ def __call__(self, pred, target):
"""
pred_mean, pred_std = calc_mean_std(pred)
target_mean, target_std = calc_mean_std(target)
- return self.mse_loss(pred_mean, target_mean) + self.mse_loss(
- pred_std, target_std)
+ return self.mse_loss(pred_mean, target_mean) + self.mse_loss(pred_std,
+ target_std)
@CRITERIONS.register()
class EdgeLoss():
def __init__(self):
k = paddle.to_tensor([[.05, .25, .4, .25, .05]])
- self.kernel = paddle.matmul(k.t(),k).unsqueeze(0).tile([3,1,1,1])
+ self.kernel = paddle.matmul(k.t(), k).unsqueeze(0).tile([3, 1, 1, 1])
self.loss = CharbonnierLoss()
def conv_gauss(self, img):
n_channels, _, kw, kh = self.kernel.shape
- img = F.pad(img, [kw//2, kh//2, kw//2, kh//2], mode='replicate')
+ img = F.pad(img, [kw // 2, kh // 2, kw // 2, kh // 2], mode='replicate')
return F.conv2d(img, self.kernel, groups=n_channels)
def laplacian_kernel(self, current):
- filtered = self.conv_gauss(current) # filter
- down = filtered[:,:,::2,::2] # downsample
- new_filter = paddle.zeros_like(filtered)
- new_filter[:,:,::2,::2] = down*4 # upsample
- filtered = self.conv_gauss(new_filter) # filter
+ filtered = self.conv_gauss(current) # filter
+ down = filtered[:, :, ::2, ::2] # downsample
+ new_filter = paddle.zeros_like(filtered)
+ new_filter[:, :, ::2, ::2] = down * 4 # upsample
+ filtered = self.conv_gauss(new_filter) # filter
diff = current - filtered
return diff
def __call__(self, x, y):
loss = self.loss(self.laplacian_kernel(x), self.laplacian_kernel(y))
- return loss
\ No newline at end of file
+ return loss
diff --git a/paddlers/models/ppgan/models/cycle_gan_model.py b/paddlers/models/ppgan/models/cycle_gan_model.py
index 6d1c3f09..4946800c 100644
--- a/paddlers/models/ppgan/models/cycle_gan_model.py
+++ b/paddlers/models/ppgan/models/cycle_gan_model.py
@@ -31,6 +31,7 @@ class CycleGANModel(BaseModel):
CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf
"""
+
def __init__(self,
generator,
discriminator=None,
diff --git a/paddlers/models/ppgan/models/dc_gan_model.py b/paddlers/models/ppgan/models/dc_gan_model.py
index 220e05c0..787299b1 100644
--- a/paddlers/models/ppgan/models/dc_gan_model.py
+++ b/paddlers/models/ppgan/models/dc_gan_model.py
@@ -28,6 +28,7 @@ class DCGANModel(BaseModel):
This class implements the DCGAN model, for learning a distribution from input images.
DCGAN paper: https://arxiv.org/pdf/1511.06434
"""
+
def __init__(self, generator, discriminator=None, gan_criterion=None):
"""Initialize the DCGAN class.
Args:
diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py b/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py
index c06ad72f..09b4b17b 100644
--- a/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py
+++ b/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py
@@ -11,34 +11,34 @@
@DISCRIMINATORS.register()
class AnimeDiscriminator(nn.Layer):
- def __init__(self, channel: int = 64, nblocks: int = 3) -> None:
+ def __init__(self, channel: int=64, nblocks: int=3) -> None:
super().__init__()
channel = channel // 2
last_channel = channel
f = [
spectral_norm(
- nn.Conv2D(3, channel, 3, stride=1, padding=1, bias_attr=False)),
+ nn.Conv2D(
+ 3, channel, 3, stride=1, padding=1, bias_attr=False)),
nn.LeakyReLU(0.2)
]
in_h = 256
for i in range(1, nblocks):
f.extend([
spectral_norm(
- nn.Conv2D(last_channel,
- channel * 2,
- 3,
- stride=2,
- padding=1,
- bias_attr=False)),
- nn.LeakyReLU(0.2),
- spectral_norm(
- nn.Conv2D(channel * 2,
- channel * 4,
- 3,
- stride=1,
- padding=1,
- bias_attr=False)),
- nn.GroupNorm(1, channel * 4),
+ nn.Conv2D(
+ last_channel,
+ channel * 2,
+ 3,
+ stride=2,
+ padding=1,
+ bias_attr=False)), nn.LeakyReLU(0.2), spectral_norm(
+ nn.Conv2D(
+ channel * 2,
+ channel * 4,
+ 3,
+ stride=1,
+ padding=1,
+ bias_attr=False)), nn.GroupNorm(1, channel * 4),
nn.LeakyReLU(0.2)
])
last_channel = channel * 4
@@ -49,15 +49,14 @@ def __init__(self, channel: int = 64, nblocks: int = 3) -> None:
self.head = nn.Sequential(*[
spectral_norm(
- nn.Conv2D(last_channel,
- channel * 2,
- 3,
- stride=1,
- padding=1,
- bias_attr=False)),
- nn.GroupNorm(1, channel * 2),
- nn.LeakyReLU(0.2),
- spectral_norm(
+ nn.Conv2D(
+ last_channel,
+ channel * 2,
+ 3,
+ stride=1,
+ padding=1,
+ bias_attr=False)), nn.GroupNorm(1, channel * 2),
+ nn.LeakyReLU(0.2), spectral_norm(
nn.Conv2D(
channel * 2, 1, 3, stride=1, padding=1, bias_attr=False))
])
diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py b/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py
index 9a18d70e..a9369486 100644
--- a/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py
+++ b/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py
@@ -31,6 +31,7 @@ class FirstOrderDiscriminator(nn.Layer):
loss_weights:
discriminator_gan (int): weight of discriminator loss
"""
+
def __init__(self, discriminator_cfg, common_params, train_params):
super(FirstOrderDiscriminator, self).__init__()
self.discriminator = MultiScaleDiscriminator(**discriminator_cfg,
@@ -47,8 +48,8 @@ def forward(self, x, generated):
kp_driving = generated['kp_driving']
discriminator_maps_generated = self.discriminator(
pyramide_generated, kp=detach_kp(kp_driving))
- discriminator_maps_real = self.discriminator(pyramide_real,
- kp=detach_kp(kp_driving))
+ discriminator_maps_real = self.discriminator(
+ pyramide_real, kp=detach_kp(kp_driving))
loss_values = {}
value_total = 0
@@ -66,6 +67,7 @@ class DownBlock2d(nn.Layer):
"""
Simple block for processing video (encoder).
"""
+
def __init__(self,
in_features,
out_features,
@@ -74,16 +76,15 @@ def __init__(self,
pool=False,
sn=False):
super(DownBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_features,
- out_features,
- kernel_size=kernel_size)
+ self.conv = nn.Conv2D(
+ in_features, out_features, kernel_size=kernel_size)
if sn:
self.conv = spectral_norm(self.conv)
else:
self.sn = None
if norm:
- self.norm = nn.InstanceNorm2D(num_features=out_features,
- epsilon=1e-05)
+ self.norm = nn.InstanceNorm2D(
+ num_features=out_features, epsilon=1e-05)
else:
self.norm = None
@@ -117,19 +118,21 @@ def __init__(self,
down_blocks = []
for i in range(num_blocks):
down_blocks.append(
- DownBlock2d(num_channels + num_kp * use_kp if i == 0 else min(
- max_features, block_expansion * (2**i)),
- min(max_features, block_expansion * (2**(i + 1))),
- norm=(i != 0),
- kernel_size=4,
- pool=(i != num_blocks - 1),
- sn=sn))
+ DownBlock2d(
+ num_channels + num_kp * use_kp
+ if i == 0 else min(max_features, block_expansion * (2**i)),
+ min(max_features, block_expansion * (2**(i + 1))),
+ norm=(i != 0),
+ kernel_size=4,
+ pool=(i != num_blocks - 1),
+ sn=sn))
self.down_blocks = nn.LayerList(down_blocks)
- self.conv = nn.Conv2D(self.down_blocks[len(self.down_blocks) -
- 1].conv.parameters()[0].shape[0],
- 1,
- kernel_size=1)
+ self.conv = nn.Conv2D(
+ self.down_blocks[len(self.down_blocks) - 1].conv.parameters()[0]
+ .shape[0],
+ 1,
+ kernel_size=1)
if sn:
self.conv = spectral_norm(self.conv)
else:
@@ -156,6 +159,7 @@ class MultiScaleDiscriminator(nn.Layer):
"""
Multi-scale (scale) discriminator
"""
+
def __init__(self, scales=(), **kwargs):
super(MultiScaleDiscriminator, self).__init__()
self.scales = scales
diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py b/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py
index 624cfd86..a6cb449b 100644
--- a/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py
+++ b/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py
@@ -25,27 +25,25 @@ def __init__(self):
num_layer = 3
num_channel = 32
self.head = nn.Sequential(
- ('conv',
- nn.Conv2D(3, num_channel, kernel_size=3, stride=1, padding=1)),
- ('norm', nn.BatchNorm2D(num_channel)),
+ ('conv', nn.Conv2D(
+ 3, num_channel, kernel_size=3, stride=1,
+ padding=1)), ('norm', nn.BatchNorm2D(num_channel)),
('LeakyRelu', nn.LeakyReLU(0.2)))
self.body = nn.Sequential()
for i in range(num_layer - 2):
self.body.add_sublayer(
'conv%d' % (i + 1),
- nn.Conv2D(num_channel,
- num_channel,
- kernel_size=3,
- stride=1,
- padding=1))
+ nn.Conv2D(
+ num_channel,
+ num_channel,
+ kernel_size=3,
+ stride=1,
+ padding=1))
self.body.add_sublayer('norm%d' % (i + 1),
nn.BatchNorm2D(num_channel))
self.body.add_sublayer('LeakyRelu%d' % (i + 1), nn.LeakyReLU(0.2))
- self.tail = nn.Conv2D(num_channel,
- 1,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.tail = nn.Conv2D(
+ num_channel, 1, kernel_size=3, stride=1, padding=1)
def forward(self, x):
x = self.head(x)
diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py b/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py
index 0e378a41..6a01d3ba 100644
--- a/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py
+++ b/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py
@@ -25,44 +25,47 @@
from .builder import DISCRIMINATORS
-
class NLayersDiscriminator(nn.Layer):
def __init__(self, opt):
super(NLayersDiscriminator, self).__init__()
-
+
kw = 4
padw = int(np.ceil((kw - 1.0) / 2))
nf = opt.ndf
input_nc = self.compute_D_input_nc(opt)
layer_count = 0
- layer = nn.Sequential(
- nn.Conv2D(input_nc, nf, kw, 2, padw),
- nn.GELU()
- )
- self.add_sublayer('block_'+str(layer_count), layer)
+ layer = nn.Sequential(nn.Conv2D(input_nc, nf, kw, 2, padw), nn.GELU())
+ self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
- feat_size_prev = np.floor((opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64')
+ feat_size_prev = np.floor(
+ (opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64')
InstanceNorm = build_norm_layer('instance')
for n in range(1, opt.n_layers_D):
nf_prev = nf
nf = min(nf * 2, 512)
stride = 1 if n == opt.n_layers_D - 1 else 2
- feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) / stride).astype('int64')
+ feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) /
+ stride).astype('int64')
feat_size_prev = feat_size
layer = nn.Sequential(
- spectral_norm(nn.Conv2D(nf_prev, nf, kw, stride, padw,
- weight_attr=None,
- bias_attr=None)),
+ spectral_norm(
+ nn.Conv2D(
+ nf_prev,
+ nf,
+ kw,
+ stride,
+ padw,
+ weight_attr=None,
+ bias_attr=None)),
InstanceNorm(nf),
- nn.GELU()
- )
- self.add_sublayer('block_'+str(layer_count), layer)
+ nn.GELU())
+ self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
layer = nn.Conv2D(nf, 1, kw, 1, padw)
- self.add_sublayer('block_'+str(layer_count), layer)
+ self.add_sublayer('block_' + str(layer_count), layer)
layer_count += 1
def forward(self, input):
@@ -80,22 +83,22 @@ def compute_D_input_nc(self, opt):
if not opt.no_instance:
input_nc += 1
return input_nc
-
+
+
@DISCRIMINATORS.register()
class MultiscaleDiscriminator(nn.Layer):
- def __init__(self,
- ndf,
- num_D,
- crop_size,
- label_nc,
- output_nc,
- contain_dontcare_label,
- no_instance,
- n_layers_D,
-
- ):
+ def __init__(
+ self,
+ ndf,
+ num_D,
+ crop_size,
+ label_nc,
+ output_nc,
+ contain_dontcare_label,
+ no_instance,
+ n_layers_D, ):
super(MultiscaleDiscriminator, self).__init__()
-
+
opt = {
'ndf': ndf,
'num_D': num_D,
@@ -105,7 +108,6 @@ def __init__(self,
'contain_dontcare_label': contain_dontcare_label,
'no_instance': no_instance,
'n_layers_D': n_layers_D,
-
}
opt = Dict(opt)
@@ -115,16 +117,16 @@ def __init__(self,
feat_size = opt.crop_size
for j in range(i):
sequence += [nn.AvgPool2D(3, 2, 1)]
- feat_size = np.floor((feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64')
+ feat_size = np.floor(
+ (feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64')
opt.crop_size = feat_size
sequence += [NLayersDiscriminator(opt)]
opt.crop_size = crop_size_bkp
sequence = nn.Sequential(*sequence)
- self.add_sublayer('nld_'+str(i), sequence)
+ self.add_sublayer('nld_' + str(i), sequence)
def forward(self, input):
output = []
for layer in self._sub_layers.values():
output.append(layer(input))
return output
-
diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py b/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py
index 62e4da12..92e19827 100644
--- a/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py
+++ b/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py
@@ -13,63 +13,61 @@ class UGATITDiscriminator(nn.Layer):
def __init__(self, input_nc, ndf=64, n_layers=5):
super(UGATITDiscriminator, self).__init__()
model = [
- nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
- spectral_norm(
- nn.Conv2D(input_nc,
- ndf,
- kernel_size=4,
- stride=2,
- padding=0,
- bias_attr=True)),
- nn.LeakyReLU(0.2)
+ nn.Pad2D(
+ padding=[1, 1, 1, 1], mode="reflect"), spectral_norm(
+ nn.Conv2D(
+ input_nc,
+ ndf,
+ kernel_size=4,
+ stride=2,
+ padding=0,
+ bias_attr=True)), nn.LeakyReLU(0.2)
]
for i in range(1, n_layers - 2):
mult = 2**(i - 1)
model += [
- nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
- spectral_norm(
- nn.Conv2D(ndf * mult,
- ndf * mult * 2,
- kernel_size=4,
- stride=2,
- padding=0,
- bias_attr=True)),
- nn.LeakyReLU(0.2)
+ nn.Pad2D(
+ padding=[1, 1, 1, 1], mode="reflect"), spectral_norm(
+ nn.Conv2D(
+ ndf * mult,
+ ndf * mult * 2,
+ kernel_size=4,
+ stride=2,
+ padding=0,
+ bias_attr=True)), nn.LeakyReLU(0.2)
]
mult = 2**(n_layers - 2 - 1)
model += [
- nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"),
- spectral_norm(
- nn.Conv2D(ndf * mult,
- ndf * mult * 2,
- kernel_size=4,
- stride=1,
- padding=0,
- bias_attr=True)),
- nn.LeakyReLU(0.2)
+ nn.Pad2D(
+ padding=[1, 1, 1, 1], mode="reflect"), spectral_norm(
+ nn.Conv2D(
+ ndf * mult,
+ ndf * mult * 2,
+ kernel_size=4,
+ stride=1,
+ padding=0,
+ bias_attr=True)), nn.LeakyReLU(0.2)
]
# Class Activation Map
mult = 2**(n_layers - 2)
self.gap_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False))
self.gmp_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False))
- self.conv1x1 = nn.Conv2D(ndf * mult * 2,
- ndf * mult,
- kernel_size=1,
- stride=1,
- bias_attr=True)
+ self.conv1x1 = nn.Conv2D(
+ ndf * mult * 2, ndf * mult, kernel_size=1, stride=1, bias_attr=True)
self.leaky_relu = nn.LeakyReLU(0.2)
self.pad = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect")
self.conv = spectral_norm(
- nn.Conv2D(ndf * mult,
- 1,
- kernel_size=4,
- stride=1,
- padding=0,
- bias_attr=False))
+ nn.Conv2D(
+ ndf * mult,
+ 1,
+ kernel_size=4,
+ stride=1,
+ padding=0,
+ bias_attr=False))
self.model = nn.Sequential(*model)
diff --git a/paddlers/models/ppgan/models/discriminators/syncnet.py b/paddlers/models/ppgan/models/discriminators/syncnet.py
index 9fc3d26a..14295777 100644
--- a/paddlers/models/ppgan/models/discriminators/syncnet.py
+++ b/paddlers/models/ppgan/models/discriminators/syncnet.py
@@ -17,126 +17,70 @@ def __init__(self):
super(SyncNetColor, self).__init__()
self.face_encoder = nn.Sequential(
- ConvBNRelu(15, 32, kernel_size=(7, 7), stride=1, padding=3),
- ConvBNRelu(32, 64, kernel_size=5, stride=(1, 2), padding=1),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(512, 512, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0),
- ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0),
- )
+ ConvBNRelu(
+ 15, 32, kernel_size=(7, 7), stride=1, padding=3),
+ ConvBNRelu(
+ 32, 64, kernel_size=5, stride=(1, 2), padding=1),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 128, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 256, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 256, 256, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 256, 256, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 256, 512, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 512, 512, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 512, 512, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 512, 512, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 512, 512, kernel_size=3, stride=1, padding=0),
+ ConvBNRelu(
+ 512, 512, kernel_size=1, stride=1, padding=0), )
self.audio_encoder = nn.Sequential(
- ConvBNRelu(1, 32, kernel_size=3, stride=1, padding=1),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(32, 64, kernel_size=3, stride=(3, 1), padding=1),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64, 128, kernel_size=3, stride=3, padding=1),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128, 256, kernel_size=3, stride=(3, 2), padding=1),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256, 512, kernel_size=3, stride=1, padding=0),
- ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0),
- )
+ ConvBNRelu(
+ 1, 32, kernel_size=3, stride=1, padding=1),
+ ConvBNRelu(
+ 32, 32, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 32, 32, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 32, 64, kernel_size=3, stride=(3, 1), padding=1),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 128, kernel_size=3, stride=3, padding=1),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 256, kernel_size=3, stride=(3, 2), padding=1),
+ ConvBNRelu(
+ 256, 256, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 256, 256, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 256, 512, kernel_size=3, stride=1, padding=0),
+ ConvBNRelu(
+ 512, 512, kernel_size=1, stride=1, padding=0), )
def forward(self, audio_sequences,
face_sequences): # audio_sequences := (B, dim, T)
diff --git a/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py b/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py
index 74b6112a..1bcfa2dc 100644
--- a/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py
+++ b/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py
@@ -16,6 +16,7 @@ class VGGDiscriminator128(nn.Layer):
num_feat (int): Channel number of base intermediate features.
Default: 64.
"""
+
def __init__(self, in_channels, num_feat, norm_layer='batch'):
super(VGGDiscriminator128, self).__init__()
@@ -23,64 +24,32 @@ def __init__(self, in_channels, num_feat, norm_layer='batch'):
self.conv0_1 = nn.Conv2D(num_feat, num_feat, 4, 2, 1, bias_attr=False)
self.bn0_1 = nn.BatchNorm2D(num_feat)
- self.conv1_0 = nn.Conv2D(num_feat,
- num_feat * 2,
- 3,
- 1,
- 1,
- bias_attr=False)
+ self.conv1_0 = nn.Conv2D(
+ num_feat, num_feat * 2, 3, 1, 1, bias_attr=False)
self.bn1_0 = nn.BatchNorm2D(num_feat * 2)
- self.conv1_1 = nn.Conv2D(num_feat * 2,
- num_feat * 2,
- 4,
- 2,
- 1,
- bias_attr=False)
+ self.conv1_1 = nn.Conv2D(
+ num_feat * 2, num_feat * 2, 4, 2, 1, bias_attr=False)
self.bn1_1 = nn.BatchNorm2D(num_feat * 2)
- self.conv2_0 = nn.Conv2D(num_feat * 2,
- num_feat * 4,
- 3,
- 1,
- 1,
- bias_attr=False)
+ self.conv2_0 = nn.Conv2D(
+ num_feat * 2, num_feat * 4, 3, 1, 1, bias_attr=False)
self.bn2_0 = nn.BatchNorm2D(num_feat * 4)
- self.conv2_1 = nn.Conv2D(num_feat * 4,
- num_feat * 4,
- 4,
- 2,
- 1,
- bias_attr=False)
+ self.conv2_1 = nn.Conv2D(
+ num_feat * 4, num_feat * 4, 4, 2, 1, bias_attr=False)
self.bn2_1 = nn.BatchNorm2D(num_feat * 4)
- self.conv3_0 = nn.Conv2D(num_feat * 4,
- num_feat * 8,
- 3,
- 1,
- 1,
- bias_attr=False)
+ self.conv3_0 = nn.Conv2D(
+ num_feat * 4, num_feat * 8, 3, 1, 1, bias_attr=False)
self.bn3_0 = nn.BatchNorm2D(num_feat * 8)
- self.conv3_1 = nn.Conv2D(num_feat * 8,
- num_feat * 8,
- 4,
- 2,
- 1,
- bias_attr=False)
+ self.conv3_1 = nn.Conv2D(
+ num_feat * 8, num_feat * 8, 4, 2, 1, bias_attr=False)
self.bn3_1 = nn.BatchNorm2D(num_feat * 8)
- self.conv4_0 = nn.Conv2D(num_feat * 8,
- num_feat * 8,
- 3,
- 1,
- 1,
- bias_attr=False)
+ self.conv4_0 = nn.Conv2D(
+ num_feat * 8, num_feat * 8, 3, 1, 1, bias_attr=False)
self.bn4_0 = nn.BatchNorm2D(num_feat * 8)
- self.conv4_1 = nn.Conv2D(num_feat * 8,
- num_feat * 8,
- 4,
- 2,
- 1,
- bias_attr=False)
+ self.conv4_1 = nn.Conv2D(
+ num_feat * 8, num_feat * 8, 4, 2, 1, bias_attr=False)
self.bn4_1 = nn.BatchNorm2D(num_feat * 8)
self.linear1 = nn.Linear(num_feat * 8 * 4 * 4, 100)
@@ -95,24 +64,24 @@ def forward(self, x):
f'but received {x.shape}.')
feat = self.lrelu(self.conv0_0(x))
- feat = self.lrelu(self.bn0_1(
- self.conv0_1(feat))) # output spatial size: (64, 64)
+ feat = self.lrelu(
+ self.bn0_1(self.conv0_1(feat))) # output spatial size: (64, 64)
feat = self.lrelu(self.bn1_0(self.conv1_0(feat)))
- feat = self.lrelu(self.bn1_1(
- self.conv1_1(feat))) # output spatial size: (32, 32)
+ feat = self.lrelu(
+ self.bn1_1(self.conv1_1(feat))) # output spatial size: (32, 32)
feat = self.lrelu(self.bn2_0(self.conv2_0(feat)))
- feat = self.lrelu(self.bn2_1(
- self.conv2_1(feat))) # output spatial size: (16, 16)
+ feat = self.lrelu(
+ self.bn2_1(self.conv2_1(feat))) # output spatial size: (16, 16)
feat = self.lrelu(self.bn3_0(self.conv3_0(feat)))
- feat = self.lrelu(self.bn3_1(
- self.conv3_1(feat))) # output spatial size: (8, 8)
+ feat = self.lrelu(
+ self.bn3_1(self.conv3_1(feat))) # output spatial size: (8, 8)
feat = self.lrelu(self.bn4_0(self.conv4_0(feat)))
- feat = self.lrelu(self.bn4_1(
- self.conv4_1(feat))) # output spatial size: (4, 4)
+ feat = self.lrelu(
+ self.bn4_1(self.conv4_1(feat))) # output spatial size: (4, 4)
feat = feat.reshape([feat.shape[0], -1])
feat = self.lrelu(self.linear1(feat))
diff --git a/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py b/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py
index ea1f9c8f..3b22ee34 100644
--- a/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py
+++ b/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py
@@ -17,37 +17,43 @@ def __init__(self):
self.face_encoder_blocks = nn.LayerList([
nn.Sequential(
- NonNormConv2d(3, 32, kernel_size=7, stride=1,
- padding=3)), # 48,96
+ NonNormConv2d(
+ 3, 32, kernel_size=7, stride=1, padding=3)), # 48,96
nn.Sequential(
- NonNormConv2d(32, 64, kernel_size=5, stride=(1, 2),
- padding=2), # 48,48
- NonNormConv2d(64, 64, kernel_size=5, stride=1, padding=2)),
+ NonNormConv2d(
+ 32, 64, kernel_size=5, stride=(1, 2), padding=2), # 48,48
+ NonNormConv2d(
+ 64, 64, kernel_size=5, stride=1, padding=2)),
nn.Sequential(
- NonNormConv2d(64, 128, kernel_size=5, stride=2,
- padding=2), # 24,24
- NonNormConv2d(128, 128, kernel_size=5, stride=1, padding=2)),
+ NonNormConv2d(
+ 64, 128, kernel_size=5, stride=2, padding=2), # 24,24
+ NonNormConv2d(
+ 128, 128, kernel_size=5, stride=1, padding=2)),
nn.Sequential(
- NonNormConv2d(128, 256, kernel_size=5, stride=2,
- padding=2), # 12,12
- NonNormConv2d(256, 256, kernel_size=5, stride=1, padding=2)),
+ NonNormConv2d(
+ 128, 256, kernel_size=5, stride=2, padding=2), # 12,12
+ NonNormConv2d(
+ 256, 256, kernel_size=5, stride=1, padding=2)),
nn.Sequential(
- NonNormConv2d(256, 512, kernel_size=3, stride=2,
- padding=1), # 6,6
- NonNormConv2d(512, 512, kernel_size=3, stride=1, padding=1)),
+ NonNormConv2d(
+ 256, 512, kernel_size=3, stride=2, padding=1), # 6,6
+ NonNormConv2d(
+ 512, 512, kernel_size=3, stride=1, padding=1)),
nn.Sequential(
- NonNormConv2d(512, 512, kernel_size=3, stride=2,
- padding=1), # 3,3
- NonNormConv2d(512, 512, kernel_size=3, stride=1, padding=1),
- ),
+ NonNormConv2d(
+ 512, 512, kernel_size=3, stride=2, padding=1), # 3,3
+ NonNormConv2d(
+ 512, 512, kernel_size=3, stride=1, padding=1), ),
nn.Sequential(
- NonNormConv2d(512, 512, kernel_size=3, stride=1,
- padding=0), # 1, 1
- NonNormConv2d(512, 512, kernel_size=1, stride=1, padding=0)),
+ NonNormConv2d(
+ 512, 512, kernel_size=3, stride=1, padding=0), # 1, 1
+ NonNormConv2d(
+ 512, 512, kernel_size=1, stride=1, padding=0)),
])
self.binary_pred = nn.Sequential(
- nn.Conv2D(512, 1, kernel_size=1, stride=1, padding=0), nn.Sigmoid())
+ nn.Conv2D(
+ 512, 1, kernel_size=1, stride=1, padding=0), nn.Sigmoid())
self.label_noise = .0
def get_lower_half(self, face_sequences):
@@ -71,8 +77,9 @@ def perceptual_forward(self, false_face_sequences):
binary_pred = self.binary_pred(false_feats).reshape(
(len(false_feats), -1))
- false_pred_loss = F.binary_cross_entropy(
- binary_pred, paddle.ones((len(false_feats), 1)))
+ false_pred_loss = F.binary_cross_entropy(binary_pred,
+ paddle.ones(
+ (len(false_feats), 1)))
return false_pred_loss
diff --git a/paddlers/models/ppgan/models/drn_model.py b/paddlers/models/ppgan/models/drn_model.py
index ce44e988..8e8e91a6 100644
--- a/paddlers/models/ppgan/models/drn_model.py
+++ b/paddlers/models/ppgan/models/drn_model.py
@@ -32,6 +32,7 @@ class DRN(BaseSRModel):
DRN paper: https://arxiv.org/pdf/1809.00219.pdf
"""
+
def __init__(self,
generator,
lq_loss_weight=0.1,
@@ -82,8 +83,8 @@ def setup_input(self, input):
self.lq = paddle.to_tensor(input['lq'])
self.visual_items['lq'] = self.lq
- if isinstance(self.scale, (list, tuple)) and len(
- self.scale) == 2 and 'lqx2' in input:
+ if isinstance(self.scale, (
+ list, tuple)) and len(self.scale) == 2 and 'lqx2' in input:
self.lqx2 = input['lqx2']
if 'gt' in input:
diff --git a/paddlers/models/ppgan/models/edvr_model.py b/paddlers/models/ppgan/models/edvr_model.py
index 3fa270d5..4b47aeb4 100644
--- a/paddlers/models/ppgan/models/edvr_model.py
+++ b/paddlers/models/ppgan/models/edvr_model.py
@@ -27,6 +27,7 @@ class EDVRModel(BaseSRModel):
Paper: EDVR: Video Restoration with Enhanced Deformable Convolutional Networks.
"""
+
def __init__(self, generator, tsa_iter, pixel_criterion=None):
"""Initialize the EDVR class.
@@ -77,8 +78,8 @@ def train_iter(self, optims=None):
def init_edvr_weight(net):
def reset_func(m):
- if hasattr(m, 'weight') and (not isinstance(
- m, (nn.BatchNorm, nn.BatchNorm2D))) and (
+ if hasattr(m, 'weight') and (
+ not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))) and (
not isinstance(m, ResidualBlockNoBN) and
(not isinstance(m, DCNPack))):
reset_parameters(m)
diff --git a/paddlers/models/ppgan/models/esrgan_model.py b/paddlers/models/ppgan/models/esrgan_model.py
index fe67cff0..ede63782 100644
--- a/paddlers/models/ppgan/models/esrgan_model.py
+++ b/paddlers/models/ppgan/models/esrgan_model.py
@@ -29,6 +29,7 @@ class ESRGAN(BaseSRModel):
ESRGAN paper: https://arxiv.org/pdf/1809.00219.pdf
"""
+
def __init__(self,
generator,
discriminator=None,
@@ -71,8 +72,8 @@ def train_iter(self, optimizers=None):
l_total += l_pix
self.losses['loss_pix'] = l_pix
if self.perceptual_criterion:
- l_g_percep, l_g_style = self.perceptual_criterion(
- self.output, self.gt)
+ l_g_percep, l_g_style = self.perceptual_criterion(self.output,
+ self.gt)
# l_total += l_pix
if l_g_percep is not None:
l_total += l_g_percep
@@ -86,14 +87,10 @@ def train_iter(self, optimizers=None):
self.set_requires_grad(self.nets['discriminator'], False)
real_d_pred = self.nets['discriminator'](self.gt).detach()
fake_g_pred = self.nets['discriminator'](self.output)
- l_g_real = self.gan_criterion(real_d_pred -
- paddle.mean(fake_g_pred),
- False,
- is_disc=False)
- l_g_fake = self.gan_criterion(fake_g_pred -
- paddle.mean(real_d_pred),
- True,
- is_disc=False)
+ l_g_real = self.gan_criterion(
+ real_d_pred - paddle.mean(fake_g_pred), False, is_disc=False)
+ l_g_fake = self.gan_criterion(
+ fake_g_pred - paddle.mean(real_d_pred), True, is_disc=False)
l_g_gan = (l_g_real + l_g_fake) / 2
l_total += l_g_gan
diff --git a/paddlers/models/ppgan/models/gan_model.py b/paddlers/models/ppgan/models/gan_model.py
index cfe157d2..d0583532 100644
--- a/paddlers/models/ppgan/models/gan_model.py
+++ b/paddlers/models/ppgan/models/gan_model.py
@@ -32,6 +32,7 @@ class GANModel(BaseModel):
vanilla GAN paper: https://arxiv.org/abs/1406.2661
"""
+
def __init__(self,
generator,
discriminator=None,
@@ -50,12 +51,12 @@ def __init__(self,
self.disc_iters = 1 if self.params is None else self.params.get(
'disc_iters', 1)
- self.disc_start_iters = (0 if self.params is None else self.params.get(
- 'disc_start_iters', 0))
- self.samples_every_row = (8 if self.params is None else self.params.get(
- 'samples_every_row', 8))
- self.visual_interval = (500 if self.params is None else self.params.get(
- 'visual_interval', 500))
+ self.disc_start_iters = (0 if self.params is None else
+ self.params.get('disc_start_iters', 0))
+ self.samples_every_row = (8 if self.params is None else
+ self.params.get('samples_every_row', 8))
+ self.visual_interval = (500 if self.params is None else
+ self.params.get('visual_interval', 500))
# define generator
self.nets['netG'] = build_generator(generator)
@@ -89,7 +90,8 @@ def setup_input(self, input):
self.n_class = self.nets['netG'].n_class
self.D_real_inputs += [
- paddle.to_tensor(input['class_id'], dtype='int64')
+ paddle.to_tensor(
+ input['class_id'], dtype='int64')
]
else:
self.n_class = 0
@@ -110,9 +112,7 @@ def setup_input(self, input):
rows_num = (batch_size - 1) // self.samples_every_row + 1
class_ids = paddle.randint(0, self.n_class, [rows_num, 1])
class_ids = class_ids.tile([1, self.samples_every_row])
- class_ids = class_ids.reshape([
- -1,
- ])[:batch_size].detach()
+ class_ids = class_ids.reshape([-1, ])[:batch_size].detach()
self.G_fixed_inputs[1] = class_ids.detach()
def forward(self):
@@ -143,8 +143,8 @@ def backward_D(self):
# combine loss and calculate gradients
if self.criterionGAN.gan_mode in ['vanilla', 'lsgan']:
- self.loss_D = self.loss_D + (self.loss_D_fake +
- self.loss_D_real) * 0.5
+ self.loss_D = self.loss_D + (self.loss_D_fake + self.loss_D_real
+ ) * 0.5
else:
self.loss_D = self.loss_D + self.loss_D_fake + self.loss_D_real
diff --git a/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py b/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py
index b0783258..cf500390 100644
--- a/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py
+++ b/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py
@@ -45,6 +45,7 @@ class BasicVSRPlusPlus(nn.Layer):
or not. If False, the output resolution is equal to the input
resolution. Default: True.
"""
+
def __init__(self, mid_channels=64, num_blocks=7, is_low_res_input=True):
super().__init__()
@@ -88,21 +89,16 @@ def __init__(self, mid_channels=64, num_blocks=7, is_low_res_input=True):
5 * mid_channels, mid_channels, num_blocks)
# upsampling module
- self.reconstruction = ResidualBlocksWithInputConv(
- 5 * mid_channels, mid_channels, 5)
- self.upsample1 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
- self.upsample2 = PixelShufflePack(mid_channels,
- 64,
- 2,
- upsample_kernel=3)
+ self.reconstruction = ResidualBlocksWithInputConv(5 * mid_channels,
+ mid_channels, 5)
+ self.upsample1 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
+ self.upsample2 = PixelShufflePack(
+ mid_channels, 64, 2, upsample_kernel=3)
self.conv_hr = nn.Conv2D(64, 64, 3, 1, 1)
self.conv_last = nn.Conv2D(64, 3, 3, 1, 1)
- self.img_upsample = nn.Upsample(scale_factor=4,
- mode='bilinear',
- align_corners=False)
+ self.img_upsample = nn.Upsample(
+ scale_factor=4, mode='bilinear', align_corners=False)
# activation function
self.lrelu = nn.LeakyReLU(negative_slope=0.1)
@@ -198,10 +194,9 @@ def forward(self, lqs):
if self.is_low_res_input:
lqs_downsample = lqs
else:
- lqs_downsample = F.interpolate(lqs.reshape([-1, c, h, w]),
- scale_factor=0.25,
- mode='bicubic').reshape(
- [n, t, c, h // 4, w // 4])
+ lqs_downsample = F.interpolate(
+ lqs.reshape([-1, c, h, w]), scale_factor=0.25,
+ mode='bicubic').reshape([n, t, c, h // 4, w // 4])
# check whether the input is an extended sequence
self.check_if_mirror_extended(lqs)
@@ -213,9 +208,10 @@ def forward(self, lqs):
feats['spatial'] = [feats_[:, i, :, :, :] for i in range(0, t)]
# compute optical flow using the low-res inputs
- assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[4] >= 64, (
- 'The height and width of low-res inputs must be at least 64, '
- f'but got {h} and {w}.')
+ assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[
+ 4] >= 64, (
+ 'The height and width of low-res inputs must be at least 64, '
+ f'but got {h} and {w}.')
flows_forward, flows_backward = self.compute_flow(lqs_downsample)
# feature propgation
@@ -253,20 +249,20 @@ def forward(self, lqs):
flow_n2 = flow_n1 + flow_warp(
flow_n2, flow_n1.transpose([0, 2, 3, 1]))
- cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3,
- 1]))
+ cond_n2 = flow_warp(feat_n2,
+ flow_n2.transpose([0, 2, 3, 1]))
# flow-guided deformable convolution
cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1)
feat_prop = paddle.concat([feat_prop, feat_n2], axis=1)
- feat_prop = self.deform_align_backward_1(
- feat_prop, cond, flow_n1, flow_n2)
+ feat_prop = self.deform_align_backward_1(feat_prop, cond,
+ flow_n1, flow_n2)
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', 'backward_1']
+ feats[k][idx] for k in feats
+ if k not in ['spatial', 'backward_1']
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
@@ -308,8 +304,8 @@ def forward(self, lqs):
flow_n2 = flow_n1 + flow_warp(
flow_n2, flow_n1.transpose([0, 2, 3, 1]))
- cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3,
- 1]))
+ cond_n2 = flow_warp(feat_n2,
+ flow_n2.transpose([0, 2, 3, 1]))
# flow-guided deformable convolution
cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1)
@@ -320,8 +316,8 @@ def forward(self, lqs):
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', 'forward_1']
+ feats[k][idx] for k in feats
+ if k not in ['spatial', 'forward_1']
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
@@ -361,20 +357,20 @@ def forward(self, lqs):
flow_n2 = flow_n1 + flow_warp(
flow_n2, flow_n1.transpose([0, 2, 3, 1]))
- cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3,
- 1]))
+ cond_n2 = flow_warp(feat_n2,
+ flow_n2.transpose([0, 2, 3, 1]))
# flow-guided deformable convolution
cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1)
feat_prop = paddle.concat([feat_prop, feat_n2], axis=1)
- feat_prop = self.deform_align_backward_2(
- feat_prop, cond, flow_n1, flow_n2)
+ feat_prop = self.deform_align_backward_2(feat_prop, cond,
+ flow_n1, flow_n2)
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', 'backward_2']
+ feats[k][idx] for k in feats
+ if k not in ['spatial', 'backward_2']
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
@@ -416,8 +412,8 @@ def forward(self, lqs):
flow_n2 = flow_n1 + flow_warp(
flow_n2, flow_n1.transpose([0, 2, 3, 1]))
- cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3,
- 1]))
+ cond_n2 = flow_warp(feat_n2,
+ flow_n2.transpose([0, 2, 3, 1]))
# flow-guided deformable convolution
cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1)
@@ -428,8 +424,8 @@ def forward(self, lqs):
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', 'forward_2']
+ feats[k][idx] for k in feats
+ if k not in ['spatial', 'forward_2']
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
diff --git a/paddlers/models/ppgan/models/generators/edvr.py b/paddlers/models/ppgan/models/generators/edvr.py
index 1a578394..3603c945 100644
--- a/paddlers/models/ppgan/models/generators/edvr.py
+++ b/paddlers/models/ppgan/models/generators/edvr.py
@@ -62,6 +62,7 @@ class ResidualBlockNoBN(nn.Layer):
nf (int): Channel number of intermediate features.
Default: 64.
"""
+
def __init__(self, nf=64):
super(ResidualBlockNoBN, self).__init__()
self.nf = nf
@@ -100,6 +101,7 @@ class PredeblurResNetPyramid(nn.Layer):
nf (int): Channel number of intermediate features. Default: 64.
HR_in (bool): Whether the input has high resolution. Default: False.
"""
+
def __init__(self, in_nf=3, nf=64, HR_in=False):
super(PredeblurResNetPyramid, self).__init__()
self.in_nf = in_nf
@@ -107,27 +109,31 @@ def __init__(self, in_nf=3, nf=64, HR_in=False):
self.HR_in = True if HR_in else False
self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1)
if self.HR_in:
- self.conv_first_1 = nn.Conv2D(in_channels=self.in_nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv_first_2 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=2,
- padding=1)
- self.conv_first_3 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=2,
- padding=1)
+ self.conv_first_1 = nn.Conv2D(
+ in_channels=self.in_nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.conv_first_2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=2,
+ padding=1)
+ self.conv_first_3 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=2,
+ padding=1)
else:
- self.conv_first = nn.Conv2D(in_channels=self.in_nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.conv_first = nn.Conv2D(
+ in_channels=self.in_nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
self.RB_L1_1 = ResidualBlockNoBN(nf=self.nf)
self.RB_L1_2 = ResidualBlockNoBN(nf=self.nf)
self.RB_L1_3 = ResidualBlockNoBN(nf=self.nf)
@@ -136,20 +142,20 @@ def __init__(self, in_nf=3, nf=64, HR_in=False):
self.RB_L2_1 = ResidualBlockNoBN(nf=self.nf)
self.RB_L2_2 = ResidualBlockNoBN(nf=self.nf)
self.RB_L3_1 = ResidualBlockNoBN(nf=self.nf)
- self.deblur_L2_conv = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=2,
- padding=1)
- self.deblur_L3_conv = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=2,
- padding=1)
- self.upsample = nn.Upsample(scale_factor=2,
- mode="bilinear",
- align_corners=False,
- align_mode=0)
+ self.deblur_L2_conv = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=2,
+ padding=1)
+ self.deblur_L3_conv = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=2,
+ padding=1)
+ self.upsample = nn.Upsample(
+ scale_factor=2, mode="bilinear", align_corners=False, align_mode=0)
def forward(self, x):
if self.HR_in:
@@ -189,6 +195,7 @@ class TSAFusion(nn.Layer):
nframes (int): Number of frames. Default: 5.
center (int): The index of center frame. Default: 2.
"""
+
def __init__(self, nf=64, nframes=5, center=2):
super(TSAFusion, self).__init__()
self.nf = nf
@@ -196,81 +203,88 @@ def __init__(self, nf=64, nframes=5, center=2):
self.center = center
self.sigmoid = nn.Sigmoid()
self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1)
- self.tAtt_2 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.tAtt_1 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.fea_fusion = nn.Conv2D(in_channels=self.nf * self.nframes,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
- self.sAtt_1 = nn.Conv2D(in_channels=self.nf * self.nframes,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
+ self.tAtt_2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.tAtt_1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.fea_fusion = nn.Conv2D(
+ in_channels=self.nf * self.nframes,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
+ self.sAtt_1 = nn.Conv2D(
+ in_channels=self.nf * self.nframes,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
self.max_pool = nn.MaxPool2D(3, stride=2, padding=1)
self.avg_pool = nn.AvgPool2D(3, stride=2, padding=1, exclusive=False)
- self.sAtt_2 = nn.Conv2D(in_channels=2 * self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
- self.sAtt_3 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.sAtt_2 = nn.Conv2D(
+ in_channels=2 * self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
+ self.sAtt_3 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
self.sAtt_4 = nn.Conv2D(
in_channels=self.nf,
out_channels=self.nf,
kernel_size=1,
stride=1,
- padding=0,
- )
- self.sAtt_5 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.sAtt_add_1 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
- self.sAtt_add_2 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
- self.sAtt_L1 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1,
- padding=0)
+ padding=0, )
+ self.sAtt_5 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.sAtt_add_1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
+ self.sAtt_add_2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
+ self.sAtt_L1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1,
+ padding=0)
self.sAtt_L2 = nn.Conv2D(
in_channels=2 * self.nf,
out_channels=self.nf,
kernel_size=3,
stride=1,
- padding=1,
- )
- self.sAtt_L3 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.upsample = nn.Upsample(scale_factor=2,
- mode="bilinear",
- align_corners=False,
- align_mode=0)
+ padding=1, )
+ self.sAtt_L3 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.upsample = nn.Upsample(
+ scale_factor=2, mode="bilinear", align_corners=False, align_mode=0)
def forward(self, aligned_fea):
"""
@@ -347,6 +361,7 @@ class DCNPack(nn.Layer):
Ref:
Delving Deep into Deformable Alignment in Video Super-Resolution.
"""
+
def __init__(self,
num_filters=64,
kernel_size=3,
@@ -361,23 +376,24 @@ def __init__(self,
self.num_filters = num_filters
if isinstance(kernel_size, int):
self.kernel_size = [kernel_size, kernel_size]
- self.conv_offset_mask = nn.Conv2D(in_channels=self.num_filters,
- out_channels=self.deformable_groups *
- 3 * self.kernel_size[0] *
- self.kernel_size[1],
- kernel_size=self.kernel_size,
- stride=stride,
- padding=padding)
+ self.conv_offset_mask = nn.Conv2D(
+ in_channels=self.num_filters,
+ out_channels=self.deformable_groups * 3 * self.kernel_size[0] *
+ self.kernel_size[1],
+ kernel_size=self.kernel_size,
+ stride=stride,
+ padding=padding)
self.total_channels = self.deformable_groups * 3 * self.kernel_size[
0] * self.kernel_size[1]
self.split_channels = self.total_channels // 3
- self.dcn = DeformConv2D(in_channels=self.num_filters,
- out_channels=self.num_filters,
- kernel_size=self.kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- deformable_groups=self.deformable_groups)
+ self.dcn = DeformConv2D(
+ in_channels=self.num_filters,
+ out_channels=self.num_filters,
+ kernel_size=self.kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ deformable_groups=self.deformable_groups)
self.sigmoid = nn.Sigmoid()
# init conv offset
constant_init(self.conv_offset_mask, 0., 0.)
@@ -408,104 +424,103 @@ class PCDAlign(nn.Layer):
nf (int): Channel number of middle features. Default: 64.
groups (int): Deformable groups. Defaults: 8.
"""
+
def __init__(self, nf=64, groups=8):
super(PCDAlign, self).__init__()
self.nf = nf
self.groups = groups
self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1)
- self.upsample = nn.Upsample(scale_factor=2,
- mode="bilinear",
- align_corners=False,
- align_mode=0)
+ self.upsample = nn.Upsample(
+ scale_factor=2, mode="bilinear", align_corners=False, align_mode=0)
# Pyramid has three levels:
# L3: level 3, 1/4 spatial size
# L2: level 2, 1/2 spatial size
# L1: level 1, original spatial size
# L3
- self.PCD_Align_L3_offset_conv1 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L3_offset_conv2 = nn.Conv2D(in_channels=nf,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L3_dcn = DCNPack(num_filters=nf,
- kernel_size=3,
- stride=1,
- padding=1,
- deformable_groups=groups)
+ self.PCD_Align_L3_offset_conv1 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_L3_offset_conv2 = nn.Conv2D(
+ in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1)
+ self.PCD_Align_L3_dcn = DCNPack(
+ num_filters=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ deformable_groups=groups)
#L2
- self.PCD_Align_L2_offset_conv1 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L2_offset_conv2 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L2_offset_conv3 = nn.Conv2D(in_channels=nf,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L2_dcn = DCNPack(num_filters=nf,
- kernel_size=3,
- stride=1,
- padding=1,
- deformable_groups=groups)
- self.PCD_Align_L2_fea_conv = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.PCD_Align_L2_offset_conv1 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_L2_offset_conv2 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_L2_offset_conv3 = nn.Conv2D(
+ in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1)
+ self.PCD_Align_L2_dcn = DCNPack(
+ num_filters=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ deformable_groups=groups)
+ self.PCD_Align_L2_fea_conv = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
#L1
- self.PCD_Align_L1_offset_conv1 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L1_offset_conv2 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L1_offset_conv3 = nn.Conv2D(in_channels=nf,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_L1_dcn = DCNPack(num_filters=nf,
- kernel_size=3,
- stride=1,
- padding=1,
- deformable_groups=groups)
- self.PCD_Align_L1_fea_conv = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.PCD_Align_L1_offset_conv1 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_L1_offset_conv2 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_L1_offset_conv3 = nn.Conv2D(
+ in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1)
+ self.PCD_Align_L1_dcn = DCNPack(
+ num_filters=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ deformable_groups=groups)
+ self.PCD_Align_L1_fea_conv = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
#cascade
- self.PCD_Align_cas_offset_conv1 = nn.Conv2D(in_channels=nf * 2,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_cas_offset_conv2 = nn.Conv2D(in_channels=nf,
- out_channels=nf,
- kernel_size=3,
- stride=1,
- padding=1)
- self.PCD_Align_cascade_dcn = DCNPack(num_filters=nf,
- kernel_size=3,
- stride=1,
- padding=1,
- deformable_groups=groups)
+ self.PCD_Align_cas_offset_conv1 = nn.Conv2D(
+ in_channels=nf * 2,
+ out_channels=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.PCD_Align_cas_offset_conv2 = nn.Conv2D(
+ in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1)
+ self.PCD_Align_cascade_dcn = DCNPack(
+ num_filters=nf,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ deformable_groups=groups)
def forward(self, nbr_fea_l, ref_fea_l):
"""Align neighboring frame features to the reference frame features.
@@ -594,6 +609,7 @@ class EDVRNet(nn.Layer):
with_tsa (bool): Whether has TSA module. Default: True.
TSA_only (bool): Whether only use TSA module. Default: False.
"""
+
def __init__(self,
in_nf=3,
out_nf=3,
@@ -623,90 +639,94 @@ def __init__(self,
self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1)
if self.predeblur:
- self.pre_deblur = PredeblurResNetPyramid(in_nf=self.in_nf,
- nf=self.nf,
- HR_in=self.HR_in)
- self.cov_1 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1)
+ self.pre_deblur = PredeblurResNetPyramid(
+ in_nf=self.in_nf, nf=self.nf, HR_in=self.HR_in)
+ self.cov_1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1)
else:
- self.conv_first = nn.Conv2D(in_channels=self.in_nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.conv_first = nn.Conv2D(
+ in_channels=self.in_nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
#feature extraction module
self.feature_extractor = MakeMultiBlocks(ResidualBlockNoBN,
self.front_RBs, self.nf)
- self.fea_L2_conv1 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=2,
- padding=1)
- self.fea_L2_conv2 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.fea_L2_conv1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=2,
+ padding=1)
+ self.fea_L2_conv2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
self.fea_L3_conv1 = nn.Conv2D(
in_channels=self.nf,
out_channels=self.nf,
kernel_size=3,
stride=2,
- padding=1,
- )
- self.fea_L3_conv2 = nn.Conv2D(in_channels=self.nf,
- out_channels=self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ padding=1, )
+ self.fea_L3_conv2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
#PCD alignment module
self.PCDModule = PCDAlign(nf=self.nf, groups=self.groups)
#TSA Fusion module
if self.w_TSA:
- self.TSAModule = TSAFusion(nf=self.nf,
- nframes=self.nframes,
- center=self.center)
+ self.TSAModule = TSAFusion(
+ nf=self.nf, nframes=self.nframes, center=self.center)
else:
- self.TSAModule = nn.Conv2D(in_channels=self.nframes * self.nf,
- out_channels=self.nf,
- kernel_size=1,
- stride=1)
+ self.TSAModule = nn.Conv2D(
+ in_channels=self.nframes * self.nf,
+ out_channels=self.nf,
+ kernel_size=1,
+ stride=1)
#reconstruction module
self.reconstructor = MakeMultiBlocks(ResidualBlockNoBN, self.back_RBs,
self.nf)
- self.upconv1 = nn.Conv2D(in_channels=self.nf,
- out_channels=4 * self.nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.upconv1 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=4 * self.nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
self.pixel_shuffle = nn.PixelShuffle(2)
- self.upconv2 = nn.Conv2D(in_channels=self.nf,
- out_channels=4 * 64,
- kernel_size=3,
- stride=1,
- padding=1)
- self.HRconv = nn.Conv2D(in_channels=64,
- out_channels=64,
- kernel_size=3,
- stride=1,
- padding=1)
- self.conv_last = nn.Conv2D(in_channels=64,
- out_channels=self.out_nf,
- kernel_size=3,
- stride=1,
- padding=1)
+ self.upconv2 = nn.Conv2D(
+ in_channels=self.nf,
+ out_channels=4 * 64,
+ kernel_size=3,
+ stride=1,
+ padding=1)
+ self.HRconv = nn.Conv2D(
+ in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1)
+ self.conv_last = nn.Conv2D(
+ in_channels=64,
+ out_channels=self.out_nf,
+ kernel_size=3,
+ stride=1,
+ padding=1)
if self.scale_factor == 4:
- self.upsample = nn.Upsample(scale_factor=self.scale_factor,
- mode="bilinear",
- align_corners=False,
- align_mode=0)
+ self.upsample = nn.Upsample(
+ scale_factor=self.scale_factor,
+ mode="bilinear",
+ align_corners=False,
+ align_mode=0)
def forward(self, x):
"""
@@ -753,8 +773,8 @@ def forward(self, x):
aligned_fea = []
for i in range(N):
nbr_fea_l = [
- L1_fea[:, i, :, :, :], L2_fea[:, i, :, :, :], L3_fea[:,
- i, :, :, :]
+ L1_fea[:, i, :, :, :], L2_fea[:, i, :, :, :],
+ L3_fea[:, i, :, :, :]
]
aligned_fea.append(self.PCDModule(nbr_fea_l, ref_fea_l))
diff --git a/paddlers/models/ppgan/models/generators/generater_animegan.py b/paddlers/models/ppgan/models/generators/generater_animegan.py
index 2d3f3aa0..ca1cadb0 100644
--- a/paddlers/models/ppgan/models/generators/generater_animegan.py
+++ b/paddlers/models/ppgan/models/generators/generater_animegan.py
@@ -13,17 +13,18 @@ class Conv2DNormLReLU(nn.Layer):
def __init__(self,
in_channels: int,
out_channels: int,
- kernel_size: int = 3,
- stride: int = 1,
- padding: int = 1,
+ kernel_size: int=3,
+ stride: int=1,
+ padding: int=1,
bias_attr=False) -> None:
super().__init__()
- self.conv = nn.Conv2D(in_channels,
- out_channels,
- kernel_size,
- stride,
- padding,
- bias_attr=bias_attr)
+ self.conv = nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size,
+ stride,
+ padding,
+ bias_attr=bias_attr)
# NOTE layer norm is crucial for animegan!
self.norm = nn.GroupNorm(1, out_channels)
self.lrelu = nn.LeakyReLU(0.2)
@@ -39,9 +40,11 @@ class ResBlock(nn.Layer):
def __init__(self, in_channels: int, out_channels: int) -> None:
super().__init__()
self.body = nn.Sequential(
- Conv2DNormLReLU(in_channels, out_channels, 1, padding=0),
+ Conv2DNormLReLU(
+ in_channels, out_channels, 1, padding=0),
Conv2DNormLReLU(out_channels, out_channels, 3),
- nn.Conv2D(out_channels, out_channels // 2, 1, bias_attr=False))
+ nn.Conv2D(
+ out_channels, out_channels // 2, 1, bias_attr=False))
def forward(self, x0):
x = self.body(x0)
@@ -61,28 +64,30 @@ def __init__(self,
self.bottle_channels = round(self.expansion * self.in_channels)
self.body = nn.Sequential(
# pw
- Conv2DNormLReLU(self.in_channels,
- self.bottle_channels,
- kernel_size=1,
- bias_attr=bias_attr),
+ Conv2DNormLReLU(
+ self.in_channels,
+ self.bottle_channels,
+ kernel_size=1,
+ bias_attr=bias_attr),
# dw
- nn.Conv2D(self.bottle_channels,
- self.bottle_channels,
- kernel_size=3,
- stride=1,
- padding=0,
- groups=self.bottle_channels,
- bias_attr=True),
+ nn.Conv2D(
+ self.bottle_channels,
+ self.bottle_channels,
+ kernel_size=3,
+ stride=1,
+ padding=0,
+ groups=self.bottle_channels,
+ bias_attr=True),
nn.GroupNorm(1, self.bottle_channels),
nn.LeakyReLU(0.2),
# pw & linear
- nn.Conv2D(self.bottle_channels,
- self.out_channels,
- kernel_size=1,
- padding=0,
- bias_attr=False),
- nn.GroupNorm(1, self.out_channels),
- )
+ nn.Conv2D(
+ self.bottle_channels,
+ self.out_channels,
+ kernel_size=1,
+ padding=0,
+ bias_attr=False),
+ nn.GroupNorm(1, self.out_channels), )
def forward(self, x0):
x = self.body(x0)
@@ -97,26 +102,41 @@ def forward(self, x0):
class AnimeGeneratorLite(nn.Layer):
def __init__(self) -> None:
super().__init__()
- self.A = nn.Sequential(Conv2DNormLReLU(3, 32, 7, padding=3),
- Conv2DNormLReLU(32, 32, stride=2),
- Conv2DNormLReLU(32, 32))
-
- self.B = nn.Sequential(Conv2DNormLReLU(32, 64, stride=2),
- Conv2DNormLReLU(64, 64), Conv2DNormLReLU(64, 64))
-
- self.C = nn.Sequential(ResBlock(64, 128), ResBlock(64, 128),
- ResBlock(64, 128), ResBlock(64, 128))
-
- self.D = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'),
- Conv2DNormLReLU(64, 64), Conv2DNormLReLU(64, 64),
- Conv2DNormLReLU(64, 64))
-
- self.E = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'),
- Conv2DNormLReLU(64, 32), Conv2DNormLReLU(32, 32),
- Conv2DNormLReLU(32, 32, 7, padding=3))
-
- self.out = nn.Sequential(nn.Conv2D(32, 3, 1, bias_attr=False),
- nn.Tanh())
+ self.A = nn.Sequential(
+ Conv2DNormLReLU(
+ 3, 32, 7, padding=3),
+ Conv2DNormLReLU(
+ 32, 32, stride=2),
+ Conv2DNormLReLU(32, 32))
+
+ self.B = nn.Sequential(
+ Conv2DNormLReLU(
+ 32, 64, stride=2),
+ Conv2DNormLReLU(64, 64),
+ Conv2DNormLReLU(64, 64))
+
+ self.C = nn.Sequential(
+ ResBlock(64, 128),
+ ResBlock(64, 128), ResBlock(64, 128), ResBlock(64, 128))
+
+ self.D = nn.Sequential(
+ nn.Upsample(
+ scale_factor=2, mode='bilinear'),
+ Conv2DNormLReLU(64, 64),
+ Conv2DNormLReLU(64, 64),
+ Conv2DNormLReLU(64, 64))
+
+ self.E = nn.Sequential(
+ nn.Upsample(
+ scale_factor=2, mode='bilinear'),
+ Conv2DNormLReLU(64, 32),
+ Conv2DNormLReLU(32, 32),
+ Conv2DNormLReLU(
+ 32, 32, 7, padding=3))
+
+ self.out = nn.Sequential(
+ nn.Conv2D(
+ 32, 3, 1, bias_attr=False), nn.Tanh())
def forward(self, x):
x = self.A(x)
@@ -132,31 +152,42 @@ def forward(self, x):
class AnimeGenerator(nn.Layer):
def __init__(self) -> None:
super().__init__()
- self.A = nn.Sequential(Conv2DNormLReLU(3, 32, 7, padding=3),
- Conv2DNormLReLU(32, 64, stride=2),
- Conv2DNormLReLU(64, 64))
-
- self.B = nn.Sequential(Conv2DNormLReLU(64, 128, stride=2),
- Conv2DNormLReLU(128, 128),
- Conv2DNormLReLU(128, 128))
-
- self.C = nn.Sequential(InvertedresBlock(128, 2, 256),
- InvertedresBlock(256, 2, 256),
- InvertedresBlock(256, 2, 256),
- InvertedresBlock(256, 2, 256),
- Conv2DNormLReLU(256, 128))
-
- self.D = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'),
- Conv2DNormLReLU(128, 128),
- Conv2DNormLReLU(128, 128))
-
- self.E = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'),
- Conv2DNormLReLU(128, 64),
- Conv2DNormLReLU(64, 64),
- Conv2DNormLReLU(64, 32, 7, padding=3))
-
- self.out = nn.Sequential(nn.Conv2D(32, 3, 1, bias_attr=False),
- nn.Tanh())
+ self.A = nn.Sequential(
+ Conv2DNormLReLU(
+ 3, 32, 7, padding=3),
+ Conv2DNormLReLU(
+ 32, 64, stride=2),
+ Conv2DNormLReLU(64, 64))
+
+ self.B = nn.Sequential(
+ Conv2DNormLReLU(
+ 64, 128, stride=2),
+ Conv2DNormLReLU(128, 128),
+ Conv2DNormLReLU(128, 128))
+
+ self.C = nn.Sequential(
+ InvertedresBlock(128, 2, 256),
+ InvertedresBlock(256, 2, 256),
+ InvertedresBlock(256, 2, 256),
+ InvertedresBlock(256, 2, 256), Conv2DNormLReLU(256, 128))
+
+ self.D = nn.Sequential(
+ nn.Upsample(
+ scale_factor=2, mode='bilinear'),
+ Conv2DNormLReLU(128, 128),
+ Conv2DNormLReLU(128, 128))
+
+ self.E = nn.Sequential(
+ nn.Upsample(
+ scale_factor=2, mode='bilinear'),
+ Conv2DNormLReLU(128, 64),
+ Conv2DNormLReLU(64, 64),
+ Conv2DNormLReLU(
+ 64, 32, 7, padding=3))
+
+ self.out = nn.Sequential(
+ nn.Conv2D(
+ 32, 3, 1, bias_attr=False), nn.Tanh())
def forward(self, x):
x = self.A(x)
diff --git a/paddlers/models/ppgan/models/generators/generater_lapstyle.py b/paddlers/models/ppgan/models/generators/generater_lapstyle.py
index 20108d7c..31b5b544 100644
--- a/paddlers/models/ppgan/models/generators/generater_lapstyle.py
+++ b/paddlers/models/ppgan/models/generators/generater_lapstyle.py
@@ -73,8 +73,8 @@ def adaptive_instance_normalization(content_feat, style_feat):
style_mean, style_std = calc_mean_std(style_feat)
content_mean, content_std = calc_mean_std(content_feat)
- normalized_feat = (content_feat -
- content_mean.expand(size)) / content_std.expand(size)
+ normalized_feat = (
+ content_feat - content_mean.expand(size)) / content_std.expand(size)
return normalized_feat * style_std.expand(size) + style_mean.expand(size)
@@ -88,12 +88,17 @@ class ResnetBlock(nn.Layer):
Args:
dim (int): Channel number of intermediate features.
"""
+
def __init__(self, dim):
super(ResnetBlock, self).__init__()
- self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(dim, dim, (3, 3)), nn.ReLU(),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(dim, dim, (3, 3)))
+ self.conv_block = nn.Sequential(
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
+ nn.Conv2D(dim, dim, (3, 3)),
+ nn.ReLU(),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
+ nn.Conv2D(dim, dim, (3, 3)))
def forward(self, x):
out = x + self.conv_block(x)
@@ -110,11 +115,14 @@ class ConvBlock(nn.Layer):
dim1 (int): Channel number of input features.
dim2 (int): Channel number of output features.
"""
+
def __init__(self, dim1, dim2):
super(ConvBlock, self).__init__()
- self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(dim1, dim2, (3, 3)),
- nn.ReLU())
+ self.conv_block = nn.Sequential(
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
+ nn.Conv2D(dim1, dim2, (3, 3)),
+ nn.ReLU())
def forward(self, x):
out = self.conv_block(x)
@@ -128,6 +136,7 @@ class DecoderNet(nn.Layer):
Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality
Artistic Style Transfer.
"""
+
def __init__(self):
super(DecoderNet, self).__init__()
@@ -142,8 +151,9 @@ def __init__(self):
self.convblock_11 = ConvBlock(64, 64)
self.upsample = nn.Upsample(scale_factor=2, mode='nearest')
- self.final_conv = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(64, 3, (3, 3)))
+ self.final_conv = nn.Sequential(
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 3, (3, 3)))
def forward(self, cF, sF):
@@ -167,8 +177,6 @@ def forward(self, cF, sF):
return out
-
-
@GENERATORS.register()
class Encoder(nn.Layer):
"""Encoder of Drafting module.
@@ -176,76 +184,97 @@ class Encoder(nn.Layer):
Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality
Artistic Style Transfer.
"""
+
def __init__(self):
super(Encoder, self).__init__()
vgg_net = nn.Sequential(
nn.Conv2D(3, 3, (1, 1)),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(3, 64, (3, 3)),
nn.ReLU(), # relu1-1
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(64, 64, (3, 3)),
nn.ReLU(), # relu1-2
- nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.MaxPool2D(
+ (2, 2), (2, 2), (0, 0), ceil_mode=True),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(64, 128, (3, 3)),
nn.ReLU(), # relu2-1
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(128, 128, (3, 3)),
nn.ReLU(), # relu2-2
- nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.MaxPool2D(
+ (2, 2), (2, 2), (0, 0), ceil_mode=True),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(128, 256, (3, 3)),
nn.ReLU(), # relu3-1
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(256, 256, (3, 3)),
nn.ReLU(), # relu3-2
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(256, 256, (3, 3)),
nn.ReLU(), # relu3-3
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(256, 256, (3, 3)),
nn.ReLU(), # relu3-4
- nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.MaxPool2D(
+ (2, 2), (2, 2), (0, 0), ceil_mode=True),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(256, 512, (3, 3)),
nn.ReLU(), # relu4-1, this is the last layer used
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu4-2
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu4-3
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu4-4
- nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.MaxPool2D(
+ (2, 2), (2, 2), (0, 0), ceil_mode=True),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu5-1
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu5-2
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU(), # relu5-3
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'),
nn.Conv2D(512, 512, (3, 3)),
nn.ReLU() # relu5-4
)
weight_path = get_path_from_url(
'https://paddlegan.bj.bcebos.com/models/vgg_normalised.pdparams')
vgg_net.set_dict(paddle.load(weight_path))
- self.enc_1 = nn.Sequential(*list(
- vgg_net.children())[:4]) # input -> relu1_1
- self.enc_2 = nn.Sequential(*list(
- vgg_net.children())[4:11]) # relu1_1 -> relu2_1
- self.enc_3 = nn.Sequential(*list(
- vgg_net.children())[11:18]) # relu2_1 -> relu3_1
- self.enc_4 = nn.Sequential(*list(
- vgg_net.children())[18:31]) # relu3_1 -> relu4_1
- self.enc_5 = nn.Sequential(*list(
- vgg_net.children())[31:44]) # relu4_1 -> relu5_1
+ self.enc_1 = nn.Sequential(
+ *list(vgg_net.children())[:4]) # input -> relu1_1
+ self.enc_2 = nn.Sequential(
+ *list(vgg_net.children())[4:11]) # relu1_1 -> relu2_1
+ self.enc_3 = nn.Sequential(
+ *list(vgg_net.children())[11:18]) # relu2_1 -> relu3_1
+ self.enc_4 = nn.Sequential(
+ *list(vgg_net.children())[18:31]) # relu3_1 -> relu4_1
+ self.enc_5 = nn.Sequential(
+ *list(vgg_net.children())[31:44]) # relu4_1 -> relu5_1
def forward(self, x):
out = {}
@@ -269,32 +298,33 @@ class RevisionNet(nn.Layer):
Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality
Artistic Style Transfer.
"""
+
def __init__(self, input_nc=6):
super(RevisionNet, self).__init__()
DownBlock = []
DownBlock += [
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(input_nc, 64, (3, 3)),
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'), nn.Conv2D(input_nc, 64, (3, 3)),
nn.ReLU()
]
DownBlock += [
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(64, 64, (3, 3), stride=2),
- nn.ReLU()
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'), nn.Conv2D(
+ 64, 64, (3, 3), stride=2), nn.ReLU()
]
self.resblock = ResnetBlock(64)
UpBlock = []
UpBlock += [
- nn.Upsample(scale_factor=2, mode='nearest'),
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(64, 64, (3, 3)),
+ nn.Upsample(
+ scale_factor=2, mode='nearest'), nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 64, (3, 3)),
nn.ReLU()
]
UpBlock += [
- nn.Pad2D([1, 1, 1, 1], mode='reflect'),
- nn.Conv2D(64, 3, (3, 3))
+ nn.Pad2D(
+ [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 3, (3, 3))
]
self.DownBlock = nn.Sequential(*DownBlock)
diff --git a/paddlers/models/ppgan/models/generators/generater_photopen.py b/paddlers/models/ppgan/models/generators/generater_photopen.py
index ecbc9073..24bffc5c 100644
--- a/paddlers/models/ppgan/models/generators/generater_photopen.py
+++ b/paddlers/models/ppgan/models/generators/generater_photopen.py
@@ -21,6 +21,7 @@
from ...utils.photopen import build_norm_layer, simam, Dict
from .builder import GENERATORS
+
class SPADE(nn.Layer):
def __init__(self, config_text, norm_nc, label_nc):
super(SPADE, self).__init__()
@@ -57,6 +58,7 @@ def forward(self, x, segmap):
return out
+
class SPADEResnetBlock(nn.Layer):
def __init__(self, fin, fout, opt):
super(SPADEResnetBlock, self).__init__()
@@ -64,7 +66,7 @@ def __init__(self, fin, fout, opt):
# Attributes
self.learned_shortcut = (fin != fout)
fmiddle = min(fin, fout)
-
+
# define spade layers
spade_config_str = opt.norm_G.replace('spectral', '')
self.spade_0 = SPADE(spade_config_str, fin, opt.semantic_nc)
@@ -75,22 +77,22 @@ def __init__(self, fin, fout, opt):
# define act_conv layers
self.act_conv_0 = nn.Sequential(*[
nn.GELU(),
- spectral_norm(nn.Conv2D(fin, fmiddle, 3, 1, 1,
- weight_attr=None,
- bias_attr=None)),
- ])
+ spectral_norm(
+ nn.Conv2D(
+ fin, fmiddle, 3, 1, 1, weight_attr=None, bias_attr=None)),
+ ])
self.act_conv_1 = nn.Sequential(*[
nn.GELU(),
- spectral_norm(nn.Conv2D(fmiddle, fout, 3, 1, 1,
- weight_attr=None,
- bias_attr=None)),
- ])
+ spectral_norm(
+ nn.Conv2D(
+ fmiddle, fout, 3, 1, 1, weight_attr=None, bias_attr=None)),
+ ])
if self.learned_shortcut:
self.act_conv_s = nn.Sequential(*[
- spectral_norm(nn.Conv2D(fin, fout, 1, 1, 0, bias_attr=False,
- weight_attr=None)),
- ])
-
+ spectral_norm(
+ nn.Conv2D(
+ fin, fout, 1, 1, 0, bias_attr=False, weight_attr=None)),
+ ])
def forward(self, x, seg):
x_s = self.shortcut(x, seg)
@@ -107,32 +109,33 @@ def shortcut(self, x, seg):
x_s = x
return x_s
+
@GENERATORS.register()
class SPADEGenerator(nn.Layer):
- def __init__(self,
- ngf,
- num_upsampling_layers,
- crop_size,
- aspect_ratio,
- norm_G,
- semantic_nc,
- use_vae,
- nef,
- ):
+ def __init__(
+ self,
+ ngf,
+ num_upsampling_layers,
+ crop_size,
+ aspect_ratio,
+ norm_G,
+ semantic_nc,
+ use_vae,
+ nef, ):
super(SPADEGenerator, self).__init__()
-
+
opt = {
- 'ngf': ngf,
- 'num_upsampling_layers': num_upsampling_layers,
- 'crop_size': crop_size,
- 'aspect_ratio': aspect_ratio,
- 'norm_G': norm_G,
- 'semantic_nc': semantic_nc,
- 'use_vae': use_vae,
- 'nef': nef,
- }
+ 'ngf': ngf,
+ 'num_upsampling_layers': num_upsampling_layers,
+ 'crop_size': crop_size,
+ 'aspect_ratio': aspect_ratio,
+ 'norm_G': norm_G,
+ 'semantic_nc': semantic_nc,
+ 'use_vae': use_vae,
+ 'nef': nef,
+ }
self.opt = Dict(opt)
-
+
nf = self.opt.ngf
self.sw, self.sh = self.compute_latent_vector_size(self.opt)
@@ -213,7 +216,8 @@ def compute_latent_vector_size(self, opt):
sh = round(sw / opt.aspect_ratio)
return sw, sh
-
+
+
class VAE_Encoder(nn.Layer):
def __init__(self, opt):
super(VAE_Encoder, self).__init__()
@@ -224,31 +228,51 @@ def __init__(self, opt):
InstanceNorm = build_norm_layer('instance')
model = [
- spectral_norm(nn.Conv2D(3, ndf, kw, 2, pw,
- weight_attr=None,
- bias_attr=None)),
+ spectral_norm(
+ nn.Conv2D(
+ 3, ndf, kw, 2, pw, weight_attr=None, bias_attr=None)),
InstanceNorm(ndf),
-
nn.GELU(),
- spectral_norm(nn.Conv2D(ndf * 1, ndf * 2, kw, 2, pw,
+ spectral_norm(
+ nn.Conv2D(
+ ndf * 1,
+ ndf * 2,
+ kw,
+ 2,
+ pw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(ndf * 2),
-
nn.GELU(),
- spectral_norm(nn.Conv2D(ndf * 2, ndf * 4, kw, 2, pw,
+ spectral_norm(
+ nn.Conv2D(
+ ndf * 2,
+ ndf * 4,
+ kw,
+ 2,
+ pw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(ndf * 4),
-
nn.GELU(),
- spectral_norm(nn.Conv2D(ndf * 4, ndf * 8, kw, 2, pw,
+ spectral_norm(
+ nn.Conv2D(
+ ndf * 4,
+ ndf * 8,
+ kw,
+ 2,
+ pw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(ndf * 8),
-
nn.GELU(),
- spectral_norm(nn.Conv2D(ndf * 8, ndf * 8, kw, 2, pw,
+ spectral_norm(
+ nn.Conv2D(
+ ndf * 8,
+ ndf * 8,
+ kw,
+ 2,
+ pw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(ndf * 8),
@@ -256,12 +280,18 @@ def __init__(self, opt):
if opt.crop_size >= 256:
model += [
nn.GELU(),
- spectral_norm(nn.Conv2D(ndf * 8, ndf * 8, kw, 2, pw,
+ spectral_norm(
+ nn.Conv2D(
+ ndf * 8,
+ ndf * 8,
+ kw,
+ 2,
+ pw,
weight_attr=None,
bias_attr=None)),
InstanceNorm(ndf * 8),
]
- model += [nn.GELU(),]
+ model += [nn.GELU(), ]
self.flatten = nn.Flatten(1, -1)
self.so = 4
@@ -272,8 +302,7 @@ def __init__(self, opt):
def forward(self, x):
x = self.model(x)
-
+
x = self.flatten(x)
return self.fc_mu(x), self.fc_var(x)
-
diff --git a/paddlers/models/ppgan/models/generators/generator_firstorder.py b/paddlers/models/ppgan/models/generators/generator_firstorder.py
index d89beb4e..76151b72 100644
--- a/paddlers/models/ppgan/models/generators/generator_firstorder.py
+++ b/paddlers/models/ppgan/models/generators/generator_firstorder.py
@@ -42,6 +42,7 @@ class FirstOrderGenerator(nn.Layer):
equivariance_value, equivariance_jacobian]
"""
+
def __init__(self, generator_cfg, kp_detector_cfg, common_params,
train_params, dis_scales):
super(FirstOrderGenerator, self).__init__()
@@ -59,9 +60,8 @@ def __init__(self, generator_cfg, kp_detector_cfg, common_params,
def forward(self, x, discriminator, kp_extractor_ori=None):
kp_source = self.kp_extractor(x['source'])
kp_driving = self.kp_extractor(x['driving'])
- generated = self.generator(x['source'],
- kp_source=kp_source,
- kp_driving=kp_driving)
+ generated = self.generator(
+ x['source'], kp_source=kp_source, kp_driving=kp_driving)
generated.update({'kp_source': kp_source, 'kp_driving': kp_driving})
loss_values = {}
@@ -84,8 +84,8 @@ def forward(self, x, discriminator, kp_extractor_ori=None):
if self.loss_weights['generator_gan'] != 0:
discriminator_maps_generated = discriminator(
pyramide_generated, kp=detach_kp(kp_driving))
- discriminator_maps_real = discriminator(pyramide_real,
- kp=detach_kp(kp_driving))
+ discriminator_maps_real = discriminator(
+ pyramide_real, kp=detach_kp(kp_driving))
value_total = 0
for scale in self.disc_scales:
key = 'prediction_map_%s' % scale
@@ -118,17 +118,17 @@ def forward(self, x, discriminator, kp_extractor_ori=None):
# Value loss part
if self.loss_weights['equivariance_value'] != 0:
- value = paddle.abs(
- kp_driving['value'] -
- transform.warp_coordinates(transformed_kp['value'])).mean()
+ value = paddle.abs(kp_driving['value'] -
+ transform.warp_coordinates(transformed_kp[
+ 'value'])).mean()
loss_values['equivariance_value'] = self.loss_weights[
'equivariance_value'] * value
# jacobian loss part
if self.loss_weights['equivariance_jacobian'] != 0:
- jacobian_transformed = paddle.matmul(
- *broadcast(transform.jacobian(transformed_kp['value']),
- transformed_kp['jacobian']))
+ jacobian_transformed = paddle.matmul(*broadcast(
+ transform.jacobian(transformed_kp['value']),
+ transformed_kp['jacobian']))
normed_driving = paddle.inverse(kp_driving['jacobian'])
normed_transformed = jacobian_transformed
value = paddle.matmul(
@@ -159,6 +159,7 @@ class VGG19(nn.Layer):
"""
Vgg19 network for perceptual loss. See Sec 3.3.
"""
+
def __init__(self, requires_grad=False):
super(VGG19, self).__init__()
pretrained_url = 'https://paddlegan.bj.bcebos.com/models/vgg19.pdparams'
@@ -209,10 +210,10 @@ class Transform:
"""
Random tps transformation for equivariance constraints. See Sec 3.3
"""
+
def __init__(self, bs, **kwargs):
- noise = paddle.distribution.Normal(loc=[0],
- scale=[kwargs['sigma_affine']
- ]).sample([bs, 2, 3])
+ noise = paddle.distribution.Normal(
+ loc=[0], scale=[kwargs['sigma_affine']]).sample([bs, 2, 3])
noise = noise.reshape((bs, 2, 3))
self.theta = noise + paddle.tensor.eye(2, 3, dtype='float32').reshape(
(1, 2, 3))
@@ -234,11 +235,12 @@ def transform_frame(self, frame):
grid = grid.reshape((1, frame.shape[2] * frame.shape[3], 2))
grid = self.warp_coordinates(grid).reshape(
(self.bs, frame.shape[2], frame.shape[3], 2))
- return F.grid_sample(frame,
- grid,
- mode='bilinear',
- padding_mode='reflection',
- align_corners=True)
+ return F.grid_sample(
+ frame,
+ grid,
+ mode='bilinear',
+ padding_mode='reflection',
+ align_corners=True)
def warp_coordinates(self, coordinates):
theta = self.theta.astype('float32')
@@ -251,8 +253,8 @@ def warp_coordinates(self, coordinates):
theta_part_a = theta[:, :, :, :2]
theta_part_b = theta[:, :, :, 2:]
- transformed = paddle.fluid.layers.matmul(
- *broadcast(theta_part_a, coordinates)) + theta_part_b #M*p + m0
+ transformed = paddle.fluid.layers.matmul(*broadcast(
+ theta_part_a, coordinates)) + theta_part_b #M*p + m0
transformed = transformed.squeeze(-1)
if self.tps:
control_points = self.control_points.astype('float32')
@@ -272,12 +274,10 @@ def warp_coordinates(self, coordinates):
def jacobian(self, coordinates):
new_coordinates = self.warp_coordinates(coordinates)
assert len(new_coordinates.shape) == 3
- grad_x = paddle.grad(new_coordinates[:, :, 0].sum(),
- coordinates,
- create_graph=True)
- grad_y = paddle.grad(new_coordinates[:, :, 1].sum(),
- coordinates,
- create_graph=True)
+ grad_x = paddle.grad(
+ new_coordinates[:, :, 0].sum(), coordinates, create_graph=True)
+ grad_y = paddle.grad(
+ new_coordinates[:, :, 1].sum(), coordinates, create_graph=True)
jacobian = paddle.concat(
[grad_x[0].unsqueeze(-2), grad_y[0].unsqueeze(-2)], axis=-2)
return jacobian
diff --git a/paddlers/models/ppgan/models/generators/generator_starganv2.py b/paddlers/models/ppgan/models/generators/generator_starganv2.py
index 7d39de59..86b28c4d 100644
--- a/paddlers/models/ppgan/models/generators/generator_starganv2.py
+++ b/paddlers/models/ppgan/models/generators/generator_starganv2.py
@@ -21,6 +21,7 @@ class AvgPool2D(nn.Layer):
Peplace avg_pool2d because paddle.grad will cause avg_pool2d to report an error when training.
In the future Paddle framework will supports avg_pool2d and remove this class.
"""
+
def __init__(self):
super(AvgPool2D, self).__init__()
self.filter = paddle.to_tensor([[1, 1], [1, 1]], dtype='float32')
@@ -49,12 +50,10 @@ def _build_weights(self, dim_in, dim_out):
self.conv1 = nn.Conv2D(dim_in, dim_in, 3, 1, 1)
self.conv2 = nn.Conv2D(dim_in, dim_out, 3, 1, 1)
if self.normalize:
- self.norm1 = nn.InstanceNorm2D(dim_in,
- weight_attr=True,
- bias_attr=True)
- self.norm2 = nn.InstanceNorm2D(dim_in,
- weight_attr=True,
- bias_attr=True)
+ self.norm1 = nn.InstanceNorm2D(
+ dim_in, weight_attr=True, bias_attr=True)
+ self.norm2 = nn.InstanceNorm2D(
+ dim_in, weight_attr=True, bias_attr=True)
if self.learned_sc:
self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False)
@@ -86,9 +85,8 @@ def forward(self, x):
class AdaIN(nn.Layer):
def __init__(self, style_dim, num_features):
super().__init__()
- self.norm = nn.InstanceNorm2D(num_features,
- weight_attr=False,
- bias_attr=False)
+ self.norm = nn.InstanceNorm2D(
+ num_features, weight_attr=False, bias_attr=False)
self.fc = nn.Linear(style_dim, num_features * 2)
def forward(self, x, s):
@@ -170,8 +168,10 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1):
self.encode = nn.LayerList()
self.decode = nn.LayerList()
self.to_rgb = nn.Sequential(
- nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True),
- nn.LeakyReLU(0.2), nn.Conv2D(dim_in, 3, 1, 1, 0))
+ nn.InstanceNorm2D(
+ dim_in, weight_attr=True, bias_attr=True),
+ nn.LeakyReLU(0.2),
+ nn.Conv2D(dim_in, 3, 1, 1, 0))
# down/up-sampling blocks
repeat_num = int(np.log2(img_size)) - 4
@@ -180,28 +180,26 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1):
for _ in range(repeat_num):
dim_out = min(dim_in * 2, max_conv_dim)
self.encode.append(
- ResBlk(dim_in, dim_out, normalize=True, downsample=True))
+ ResBlk(
+ dim_in, dim_out, normalize=True, downsample=True))
if len(self.decode) == 0:
self.decode.append(
- AdainResBlk(dim_out,
- dim_in,
- style_dim,
- w_hpf=w_hpf,
- upsample=True))
+ AdainResBlk(
+ dim_out, dim_in, style_dim, w_hpf=w_hpf, upsample=True))
else:
- self.decode.insert(0,
- AdainResBlk(dim_out,
- dim_in,
- style_dim,
- w_hpf=w_hpf,
- upsample=True)) # stack-like
+ self.decode.insert(
+ 0,
+ AdainResBlk(
+ dim_out, dim_in, style_dim, w_hpf=w_hpf,
+ upsample=True)) # stack-like
dim_in = dim_out
# bottleneck blocks
for _ in range(2):
self.encode.append(ResBlk(dim_out, dim_out, normalize=True))
self.decode.insert(
- 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf))
+ 0, AdainResBlk(
+ dim_out, dim_out, style_dim, w_hpf=w_hpf))
if w_hpf > 0:
self.hpf = HighPass(w_hpf)
@@ -217,9 +215,8 @@ def forward(self, x, s, masks=None):
x = block(x, s)
if (masks is not None) and (x.shape[2] in [32, 64, 128]):
mask = masks[0] if x.shape[2] in [32] else masks[1]
- mask = F.interpolate(mask,
- size=[x.shape[2], x.shape[2]],
- mode='bilinear')
+ mask = F.interpolate(
+ mask, size=[x.shape[2], x.shape[2]], mode='bilinear')
x = x + self.hpf(mask * cache[x.shape[2]])
return self.to_rgb(x)
@@ -239,10 +236,12 @@ def __init__(self, latent_dim=16, style_dim=64, num_domains=2):
self.unshared = nn.LayerList()
for _ in range(num_domains):
self.unshared.append(
- nn.Sequential(nn.Linear(512, 512),
- nn.ReLU(), nn.Linear(512, 512), nn.ReLU(),
- nn.Linear(512, 512), nn.ReLU(),
- nn.Linear(512, style_dim)))
+ nn.Sequential(
+ nn.Linear(512, 512),
+ nn.ReLU(),
+ nn.Linear(512, 512),
+ nn.ReLU(),
+ nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, style_dim)))
def forward(self, z, y):
h = self.shared(z)
@@ -254,8 +253,8 @@ def forward(self, z, y):
s = []
for i in range(idx.shape[0]):
s += [
- out[idx[i].numpy().astype(np.int).tolist()[0],
- y[i].numpy().astype(np.int).tolist()[0]]
+ out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy()
+ .astype(np.int).tolist()[0]]
]
s = paddle.stack(s)
s = paddle.reshape(s, (s.shape[0], -1))
@@ -300,8 +299,8 @@ def forward(self, x, y):
s = []
for i in range(idx.shape[0]):
s += [
- out[idx[i].numpy().astype(np.int).tolist()[0],
- y[i].numpy().astype(np.int).tolist()[0]]
+ out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy()
+ .astype(np.int).tolist()[0]]
]
s = paddle.stack(s)
s = paddle.reshape(s, (s.shape[0], -1))
@@ -320,15 +319,16 @@ def __init__(self,
self.end_relu = end_relu
# Base part
- self.conv1 = CoordConvTh(256,
- 256,
- True,
- False,
- in_channels=3,
- out_channels=64,
- kernel_size=7,
- stride=2,
- padding=3)
+ self.conv1 = CoordConvTh(
+ 256,
+ 256,
+ True,
+ False,
+ in_channels=3,
+ out_channels=64,
+ kernel_size=7,
+ stride=2,
+ padding=3)
self.bn1 = nn.BatchNorm2D(64)
self.conv2 = ConvBlock(64, 128)
self.conv3 = ConvBlock(128, 128)
@@ -395,9 +395,10 @@ def get_heatmap(self, x, b_preprocess=True):
heatmaps = outputs[-1][:, :-1, :, :]
scale_factor = x.shape[2] // heatmaps.shape[2]
if b_preprocess:
- heatmaps = F.interpolate(heatmaps,
- scale_factor=scale_factor,
- mode='bilinear',
- align_corners=True)
+ heatmaps = F.interpolate(
+ heatmaps,
+ scale_factor=scale_factor,
+ mode='bilinear',
+ align_corners=True)
heatmaps = preprocess(heatmaps)
return heatmaps
diff --git a/paddlers/models/ppgan/models/generators/iconvsr.py b/paddlers/models/ppgan/models/generators/iconvsr.py
index f97931be..4b7b57b4 100644
--- a/paddlers/models/ppgan/models/generators/iconvsr.py
+++ b/paddlers/models/ppgan/models/generators/iconvsr.py
@@ -45,6 +45,7 @@ class IconVSR(nn.Layer):
then the (0, 5, 10, 15, ...)-th frame will be the keyframes.
Default: 5.
"""
+
def __init__(self,
mid_channels=64,
num_blocks=30,
@@ -64,25 +65,17 @@ def __init__(self,
self.spynet.set_state_dict(paddle.load(weight_path))
# information-refill
- self.edvr = EDVRFeatureExtractor(num_frames=padding * 2 + 1,
- center_frame_idx=padding)
+ self.edvr = EDVRFeatureExtractor(
+ num_frames=padding * 2 + 1, center_frame_idx=padding)
edvr_wight_path = get_path_from_url(
'https://paddlegan.bj.bcebos.com/models/edvrm.pdparams')
self.edvr.set_state_dict(paddle.load(edvr_wight_path))
- self.backward_fusion = nn.Conv2D(2 * mid_channels,
- mid_channels,
- 3,
- 1,
- 1,
- bias_attr=True)
- self.forward_fusion = nn.Conv2D(2 * mid_channels,
- mid_channels,
- 3,
- 1,
- 1,
- bias_attr=True)
+ self.backward_fusion = nn.Conv2D(
+ 2 * mid_channels, mid_channels, 3, 1, 1, bias_attr=True)
+ self.forward_fusion = nn.Conv2D(
+ 2 * mid_channels, mid_channels, 3, 1, 1, bias_attr=True)
# propagation branches
self.backward_resblocks = ResidualBlocksWithInputConv(
@@ -92,19 +85,14 @@ def __init__(self,
# upsample
# self.fusion = nn.Conv2D(mid_channels * 2, mid_channels, 1, 1, 0)
- self.upsample1 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
- self.upsample2 = PixelShufflePack(mid_channels,
- 64,
- 2,
- upsample_kernel=3)
+ self.upsample1 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
+ self.upsample2 = PixelShufflePack(
+ mid_channels, 64, 2, upsample_kernel=3)
self.conv_hr = nn.Conv2D(64, 64, 3, 1, 1)
self.conv_last = nn.Conv2D(64, 3, 3, 1, 1)
- self.img_upsample = nn.Upsample(scale_factor=4,
- mode='bilinear',
- align_corners=False)
+ self.img_upsample = nn.Upsample(
+ scale_factor=4, mode='bilinear', align_corners=False)
# activation function
self.lrelu = nn.LeakyReLU(negative_slope=0.1)
@@ -319,6 +307,7 @@ class EDVRFeatureExtractor(nn.Layer):
0. Default: 2.
with_tsa (bool): Whether to use TSA module. Default: True.
"""
+
def __init__(self,
in_channels=3,
out_channel=3,
@@ -336,9 +325,8 @@ def __init__(self,
self.with_tsa = with_tsa
self.conv_first = nn.Conv2D(in_channels, mid_channels, 3, 1, 1)
- self.feature_extraction = make_layer(ResidualBlockNoBN,
- num_blocks_extraction,
- nf=mid_channels)
+ self.feature_extraction = make_layer(
+ ResidualBlockNoBN, num_blocks_extraction, nf=mid_channels)
# generate pyramid features
self.feat_l2_conv1 = nn.Conv2D(mid_channels, mid_channels, 3, 2, 1)
@@ -350,9 +338,10 @@ def __init__(self,
self.pcd_alignment = PCDAlign(nf=mid_channels, groups=deform_groups)
# fusion
if self.with_tsa:
- self.fusion = TSAFusion(nf=mid_channels,
- nframes=num_frames,
- center=self.center_frame_idx)
+ self.fusion = TSAFusion(
+ nf=mid_channels,
+ nframes=num_frames,
+ center=self.center_frame_idx)
else:
self.fusion = nn.Conv2D(num_frames * mid_channels, mid_channels, 1,
1)
diff --git a/paddlers/models/ppgan/models/generators/lesrcnn.py b/paddlers/models/ppgan/models/generators/lesrcnn.py
index 7bb2a4e9..890bc567 100644
--- a/paddlers/models/ppgan/models/generators/lesrcnn.py
+++ b/paddlers/models/ppgan/models/generators/lesrcnn.py
@@ -63,12 +63,14 @@ def __init__(self, n_channels, scale, group=1):
if scale == 2 or scale == 4 or scale == 8:
for _ in range(int(math.log(scale, 2))):
modules += [
- nn.Conv2D(n_channels, 4 * n_channels, 3, 1, 1, groups=group)
+ nn.Conv2D(
+ n_channels, 4 * n_channels, 3, 1, 1, groups=group)
]
modules += [nn.PixelShuffle(2)]
elif scale == 3:
modules += [
- nn.Conv2D(n_channels, 9 * n_channels, 3, 1, 1, groups=group)
+ nn.Conv2D(
+ n_channels, 9 * n_channels, 3, 1, 1, groups=group)
]
modules += [nn.PixelShuffle(3)]
@@ -89,12 +91,12 @@ class LESRCNNGenerator(nn.Layer):
multi_scale (bool): Whether to train multi scale model.
group (int): group option for convolution.
"""
+
def __init__(
- self,
- scale=4,
- multi_scale=False,
- group=1,
- ):
+ self,
+ scale=4,
+ multi_scale=False,
+ group=1, ):
super(LESRCNNGenerator, self).__init__()
kernel_size = 3
@@ -110,165 +112,197 @@ def __init__(
self.add_mean = MeanShift((0.4488, 0.4371, 0.4040), sub=False)
self.conv1 = nn.Sequential(
- nn.Conv2D(in_channels=channels,
- out_channels=features,
- kernel_size=kernel_size,
- padding=padding,
- groups=1,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=channels,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=1,
+ bias_attr=False))
self.conv2 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv3 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv4 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv5 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv6 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv7 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv8 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv9 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv10 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv11 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv12 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv13 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv14 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv15 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv16 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv17 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size1,
- padding=0,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size1,
+ padding=0,
+ groups=groups,
+ bias_attr=False))
self.conv17_1 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv17_2 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv17_3 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv17_4 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=features,
- kernel_size=kernel_size,
- padding=1,
- groups=1,
- bias_attr=False), nn.ReLU())
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=features,
+ kernel_size=kernel_size,
+ padding=1,
+ groups=1,
+ bias_attr=False),
+ nn.ReLU())
self.conv18 = nn.Sequential(
- nn.Conv2D(in_channels=features,
- out_channels=3,
- kernel_size=kernel_size,
- padding=padding,
- groups=groups,
- bias_attr=False))
+ nn.Conv2D(
+ in_channels=features,
+ out_channels=3,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=groups,
+ bias_attr=False))
self.ReLU = nn.ReLU()
- self.upsample = UpsampleBlock(64,
- scale=scale,
- multi_scale=multi_scale,
- group=1)
+ self.upsample = UpsampleBlock(
+ 64, scale=scale, multi_scale=multi_scale, group=1)
def forward(self, x, scale=None):
if scale is None:
diff --git a/paddlers/models/ppgan/models/generators/mobile_resnet.py b/paddlers/models/ppgan/models/generators/mobile_resnet.py
index 6b178ab1..4c0dc9de 100644
--- a/paddlers/models/ppgan/models/generators/mobile_resnet.py
+++ b/paddlers/models/ppgan/models/generators/mobile_resnet.py
@@ -37,54 +37,52 @@ def __init__(self,
use_bias = norm_layer == nn.InstanceNorm2D
self.model = nn.LayerList([
- nn.ReflectionPad2d([3, 3, 3, 3]),
- nn.Conv2D(input_channel,
- int(ngf),
- kernel_size=7,
- padding=0,
- bias_attr=use_bias),
- norm_layer(ngf),
- nn.ReLU()
+ nn.ReflectionPad2d([3, 3, 3, 3]), nn.Conv2D(
+ input_channel,
+ int(ngf),
+ kernel_size=7,
+ padding=0,
+ bias_attr=use_bias), norm_layer(ngf), nn.ReLU()
])
n_downsampling = 2
for i in range(n_downsampling):
mult = 2**i
self.model.extend([
- nn.Conv2D(ngf * mult,
- ngf * mult * 2,
- kernel_size=3,
- stride=2,
- padding=1,
- bias_attr=use_bias),
- norm_layer(ngf * mult * 2),
- nn.ReLU()
+ nn.Conv2D(
+ ngf * mult,
+ ngf * mult * 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU()
])
mult = 2**n_downsampling
for i in range(n_blocks):
self.model.extend([
- MobileResnetBlock(ngf * mult,
- ngf * mult,
- padding_type=padding_type,
- norm_layer=norm_layer,
- use_dropout=use_dropout,
- use_bias=use_bias)
+ MobileResnetBlock(
+ ngf * mult,
+ ngf * mult,
+ padding_type=padding_type,
+ norm_layer=norm_layer,
+ use_dropout=use_dropout,
+ use_bias=use_bias)
])
for i in range(n_downsampling):
mult = 2**(n_downsampling - i)
output_size = (i + 1) * 128
self.model.extend([
- nn.Conv2DTranspose(ngf * mult,
- int(ngf * mult / 2),
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1,
- bias_attr=use_bias),
- norm_layer(int(ngf * mult / 2)),
+ nn.Conv2DTranspose(
+ ngf * mult,
+ int(ngf * mult / 2),
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1,
+ bias_attr=use_bias), norm_layer(int(ngf * mult / 2)),
nn.ReLU()
])
@@ -119,13 +117,12 @@ def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout,
self.padding_type)
self.conv_block.extend([
- SeparableConv2D(num_channels=in_c,
- num_filters=out_c,
- filter_size=3,
- padding=p,
- stride=1),
- norm_layer(out_c),
- nn.ReLU()
+ SeparableConv2D(
+ num_channels=in_c,
+ num_filters=out_c,
+ filter_size=3,
+ padding=p,
+ stride=1), norm_layer(out_c), nn.ReLU()
])
self.conv_block.extend([nn.Dropout(0.5)])
@@ -141,12 +138,12 @@ def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout,
self.padding_type)
self.conv_block.extend([
- SeparableConv2D(num_channels=out_c,
- num_filters=in_c,
- filter_size=3,
- padding=p,
- stride=1),
- norm_layer(in_c)
+ SeparableConv2D(
+ num_channels=out_c,
+ num_filters=in_c,
+ filter_size=3,
+ padding=p,
+ stride=1), norm_layer(in_c)
])
def forward(self, inputs):
@@ -178,8 +175,8 @@ def __init__(self,
stride=stride,
padding=padding,
groups=num_channels,
- weight_attr=paddle.ParamAttr(
- initializer=nn.initializer.Normal(loc=0.0, scale=stddev)),
+ weight_attr=paddle.ParamAttr(initializer=nn.initializer.Normal(
+ loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
@@ -191,8 +188,8 @@ def __init__(self,
out_channels=num_filters,
kernel_size=1,
stride=1,
- weight_attr=paddle.ParamAttr(
- initializer=nn.initializer.Normal(loc=0.0, scale=stddev)),
+ weight_attr=paddle.ParamAttr(initializer=nn.initializer.Normal(
+ loc=0.0, scale=stddev)),
bias_attr=use_bias)
])
diff --git a/paddlers/models/ppgan/models/generators/mpr.py b/paddlers/models/ppgan/models/generators/mpr.py
index 9be802a1..aed93cb4 100644
--- a/paddlers/models/ppgan/models/generators/mpr.py
+++ b/paddlers/models/ppgan/models/generators/mpr.py
@@ -14,12 +14,13 @@
def conv(in_channels, out_channels, kernel_size, bias_attr=False, stride=1):
- return nn.Conv2D(in_channels,
- out_channels,
- kernel_size,
- padding=(kernel_size // 2),
- bias_attr=bias_attr,
- stride=stride)
+ return nn.Conv2D(
+ in_channels,
+ out_channels,
+ kernel_size,
+ padding=(kernel_size // 2),
+ bias_attr=bias_attr,
+ stride=stride)
## Channel Attention Layer
@@ -30,16 +31,20 @@ def __init__(self, channel, reduction=16, bias_attr=False):
self.avg_pool = nn.AdaptiveAvgPool2D(1)
# feature channel downscale and upscale --> channel weight
self.conv_du = nn.Sequential(
- nn.Conv2D(channel,
- channel // reduction,
- 1,
- padding=0,
- bias_attr=bias_attr), nn.ReLU(),
- nn.Conv2D(channel // reduction,
- channel,
- 1,
- padding=0,
- bias_attr=bias_attr), nn.Sigmoid())
+ nn.Conv2D(
+ channel,
+ channel // reduction,
+ 1,
+ padding=0,
+ bias_attr=bias_attr),
+ nn.ReLU(),
+ nn.Conv2D(
+ channel // reduction,
+ channel,
+ 1,
+ padding=0,
+ bias_attr=bias_attr),
+ nn.Sigmoid())
def forward(self, x):
y = self.avg_pool(x)
@@ -53,10 +58,12 @@ def __init__(self, n_feat, kernel_size, reduction, bias_attr, act):
super(CAB, self).__init__()
modules_body = []
modules_body.append(
- conv(n_feat, n_feat, kernel_size, bias_attr=bias_attr))
+ conv(
+ n_feat, n_feat, kernel_size, bias_attr=bias_attr))
modules_body.append(act)
modules_body.append(
- conv(n_feat, n_feat, kernel_size, bias_attr=bias_attr))
+ conv(
+ n_feat, n_feat, kernel_size, bias_attr=bias_attr))
self.CA = CALayer(n_feat, reduction, bias_attr=bias_attr)
self.body = nn.Sequential(*modules_body)
@@ -73,13 +80,15 @@ class DownSample(nn.Layer):
def __init__(self, in_channels, s_factor):
super(DownSample, self).__init__()
self.down = nn.Sequential(
- nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=False),
- nn.Conv2D(in_channels,
- in_channels + s_factor,
- 1,
- stride=1,
- padding=0,
- bias_attr=False))
+ nn.Upsample(
+ scale_factor=0.5, mode='bilinear', align_corners=False),
+ nn.Conv2D(
+ in_channels,
+ in_channels + s_factor,
+ 1,
+ stride=1,
+ padding=0,
+ bias_attr=False))
def forward(self, x):
x = self.down(x)
@@ -90,13 +99,15 @@ class UpSample(nn.Layer):
def __init__(self, in_channels, s_factor):
super(UpSample, self).__init__()
self.up = nn.Sequential(
- nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
- nn.Conv2D(in_channels + s_factor,
- in_channels,
- 1,
- stride=1,
- padding=0,
- bias_attr=False))
+ nn.Upsample(
+ scale_factor=2, mode='bilinear', align_corners=False),
+ nn.Conv2D(
+ in_channels + s_factor,
+ in_channels,
+ 1,
+ stride=1,
+ padding=0,
+ bias_attr=False))
def forward(self, x):
x = self.up(x)
@@ -107,13 +118,15 @@ class SkipUpSample(nn.Layer):
def __init__(self, in_channels, s_factor):
super(SkipUpSample, self).__init__()
self.up = nn.Sequential(
- nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False),
- nn.Conv2D(in_channels + s_factor,
- in_channels,
- 1,
- stride=1,
- padding=0,
- bias_attr=False))
+ nn.Upsample(
+ scale_factor=2, mode='bilinear', align_corners=False),
+ nn.Conv2D(
+ in_channels + s_factor,
+ in_channels,
+ 1,
+ stride=1,
+ padding=0,
+ bias_attr=False))
def forward(self, x, y):
x = self.up(x)
@@ -156,31 +169,31 @@ def __init__(self, n_feat, kernel_size, reduction, act, bias_attr,
# Cross Stage Feature Fusion (CSFF)
if csff:
- self.csff_enc1 = nn.Conv2D(n_feat,
- n_feat,
- kernel_size=1,
- bias_attr=bias_attr)
- self.csff_enc2 = nn.Conv2D(n_feat + scale_unetfeats,
- n_feat + scale_unetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.csff_enc3 = nn.Conv2D(n_feat + (scale_unetfeats * 2),
- n_feat + (scale_unetfeats * 2),
- kernel_size=1,
- bias_attr=bias_attr)
-
- self.csff_dec1 = nn.Conv2D(n_feat,
- n_feat,
- kernel_size=1,
- bias_attr=bias_attr)
- self.csff_dec2 = nn.Conv2D(n_feat + scale_unetfeats,
- n_feat + scale_unetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.csff_dec3 = nn.Conv2D(n_feat + (scale_unetfeats * 2),
- n_feat + (scale_unetfeats * 2),
- kernel_size=1,
- bias_attr=bias_attr)
+ self.csff_enc1 = nn.Conv2D(
+ n_feat, n_feat, kernel_size=1, bias_attr=bias_attr)
+ self.csff_enc2 = nn.Conv2D(
+ n_feat + scale_unetfeats,
+ n_feat + scale_unetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.csff_enc3 = nn.Conv2D(
+ n_feat + (scale_unetfeats * 2),
+ n_feat + (scale_unetfeats * 2),
+ kernel_size=1,
+ bias_attr=bias_attr)
+
+ self.csff_dec1 = nn.Conv2D(
+ n_feat, n_feat, kernel_size=1, bias_attr=bias_attr)
+ self.csff_dec2 = nn.Conv2D(
+ n_feat + scale_unetfeats,
+ n_feat + scale_unetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.csff_dec3 = nn.Conv2D(
+ n_feat + (scale_unetfeats * 2),
+ n_feat + (scale_unetfeats * 2),
+ kernel_size=1,
+ bias_attr=bias_attr)
def forward(self, x, encoder_outs=None, decoder_outs=None):
enc1 = self.encoder_level1(x)
@@ -300,36 +313,42 @@ def __init__(self, n_feat, scale_orsnetfeats, kernel_size, reduction, act,
UpSample(n_feat + scale_unetfeats, scale_unetfeats),
UpSample(n_feat, scale_unetfeats))
- self.conv_enc1 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.conv_enc2 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.conv_enc3 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
-
- self.conv_dec1 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.conv_dec2 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
- self.conv_dec3 = nn.Conv2D(n_feat,
- n_feat + scale_orsnetfeats,
- kernel_size=1,
- bias_attr=bias_attr)
+ self.conv_enc1 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.conv_enc2 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.conv_enc3 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+
+ self.conv_dec1 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.conv_dec2 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
+ self.conv_dec3 = nn.Conv2D(
+ n_feat,
+ n_feat + scale_orsnetfeats,
+ kernel_size=1,
+ bias_attr=bias_attr)
def forward(self, x, encoder_outs, decoder_outs):
x = self.orb1(x)
- x = x + self.conv_enc1(encoder_outs[0]) + self.conv_dec1(
- decoder_outs[0])
+ x = x + self.conv_enc1(encoder_outs[0]) + self.conv_dec1(decoder_outs[
+ 0])
x = self.orb2(x)
x = x + self.conv_enc2(self.up_enc1(encoder_outs[1])) + self.conv_dec2(
@@ -374,33 +393,38 @@ def __init__(self,
super(MPRNet, self).__init__()
act = nn.PReLU()
self.shallow_feat1 = nn.Sequential(
- conv(in_c, n_feat, kernel_size, bias_attr=bias_attr),
+ conv(
+ in_c, n_feat, kernel_size, bias_attr=bias_attr),
CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act))
self.shallow_feat2 = nn.Sequential(
- conv(in_c, n_feat, kernel_size, bias_attr=bias_attr),
+ conv(
+ in_c, n_feat, kernel_size, bias_attr=bias_attr),
CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act))
self.shallow_feat3 = nn.Sequential(
- conv(in_c, n_feat, kernel_size, bias_attr=bias_attr),
+ conv(
+ in_c, n_feat, kernel_size, bias_attr=bias_attr),
CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act))
# Cross Stage Feature Fusion (CSFF)
- self.stage1_encoder = Encoder(n_feat,
- kernel_size,
- reduction,
- act,
- bias_attr,
- scale_unetfeats,
- csff=False)
+ self.stage1_encoder = Encoder(
+ n_feat,
+ kernel_size,
+ reduction,
+ act,
+ bias_attr,
+ scale_unetfeats,
+ csff=False)
self.stage1_decoder = Decoder(n_feat, kernel_size, reduction, act,
bias_attr, scale_unetfeats)
- self.stage2_encoder = Encoder(n_feat,
- kernel_size,
- reduction,
- act,
- bias_attr,
- scale_unetfeats,
- csff=True)
+ self.stage2_encoder = Encoder(
+ n_feat,
+ kernel_size,
+ reduction,
+ act,
+ bias_attr,
+ scale_unetfeats,
+ csff=True)
self.stage2_decoder = Decoder(n_feat, kernel_size, reduction, act,
bias_attr, scale_unetfeats)
@@ -411,18 +435,15 @@ def __init__(self,
self.sam12 = SAM(n_feat, kernel_size=1, bias_attr=bias_attr)
self.sam23 = SAM(n_feat, kernel_size=1, bias_attr=bias_attr)
- self.concat12 = conv(n_feat * 2,
- n_feat,
- kernel_size,
- bias_attr=bias_attr)
- self.concat23 = conv(n_feat * 2,
- n_feat + scale_orsnetfeats,
- kernel_size,
- bias_attr=bias_attr)
- self.tail = conv(n_feat + scale_orsnetfeats,
- out_c,
- kernel_size,
- bias_attr=bias_attr)
+ self.concat12 = conv(
+ n_feat * 2, n_feat, kernel_size, bias_attr=bias_attr)
+ self.concat23 = conv(
+ n_feat * 2,
+ n_feat + scale_orsnetfeats,
+ kernel_size,
+ bias_attr=bias_attr)
+ self.tail = conv(
+ n_feat + scale_orsnetfeats, out_c, kernel_size, bias_attr=bias_attr)
def forward(self, x3_img):
# Original-resolution Image for Stage 3
diff --git a/paddlers/models/ppgan/models/generators/msvsr.py b/paddlers/models/ppgan/models/generators/msvsr.py
index 79e841cf..512c55d3 100644
--- a/paddlers/models/ppgan/models/generators/msvsr.py
+++ b/paddlers/models/ppgan/models/generators/msvsr.py
@@ -59,6 +59,7 @@ class MSVSR(nn.Layer):
use_local_connnect (bool): Whether add feature of stage1 after upsample.
Default: True.
"""
+
def __init__(self,
mid_channels=32,
num_init_blocks=2,
@@ -129,34 +130,30 @@ def __init__(self,
(3 + i) * mid_channels, mid_channels, num_blocks)
# stage1
- self.stage1_align = AlignmentModule(mid_channels,
- mid_channels,
- 3,
- padding=1,
- deformable_groups=stage1_groups)
- self.stage1_blocks = ResidualBlocksWithInputConv(
- 3 * mid_channels, mid_channels, 3)
+ self.stage1_align = AlignmentModule(
+ mid_channels,
+ mid_channels,
+ 3,
+ padding=1,
+ deformable_groups=stage1_groups)
+ self.stage1_blocks = ResidualBlocksWithInputConv(3 * mid_channels,
+ mid_channels, 3)
# upsampling module
self.reconstruction = ResidualBlocksWithInputConv(
6 * mid_channels, mid_channels, num_reconstruction_blocks)
- self.upsample1 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
- self.upsample2 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
+ self.upsample1 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
+ self.upsample2 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
if self.only_last:
self.conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1)
else:
self.conv_hr = nn.Conv2D(mid_channels, mid_channels, 3, 1, 1)
self.conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1)
- self.img_upsample = nn.Upsample(scale_factor=4,
- mode='bilinear',
- align_corners=False)
+ self.img_upsample = nn.Upsample(
+ scale_factor=4, mode='bilinear', align_corners=False)
# activation function
self.lrelu = nn.LeakyReLU(negative_slope=0.1)
@@ -179,14 +176,10 @@ def __init__(self,
self.aux_conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1)
- self.aux_upsample1 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
- self.aux_upsample2 = PixelShufflePack(mid_channels,
- mid_channels,
- 2,
- upsample_kernel=3)
+ self.aux_upsample1 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
+ self.aux_upsample2 = PixelShufflePack(
+ mid_channels, mid_channels, 2, upsample_kernel=3)
self.hybrid_conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1)
def check_if_mirror_extended(self, lrs):
@@ -267,8 +260,8 @@ def stage1(self, feats, flows, flows_forward=None):
if i < t:
feat_back = feats['spatial'][mapping_idx[idx - 1]]
flow_n1_ = flows_forward[:, flow_idx[i] - 1, :, :, :]
- cond_n1_ = flow_warp(feat_back, flow_n1_.transpose([0, 2, 3,
- 1]))
+ cond_n1_ = flow_warp(feat_back,
+ flow_n1_.transpose([0, 2, 3, 1]))
cond_ = paddle.concat([cond_n1_, feat_current], axis=1)
feat_back, _, _ = self.stage1_align(feat_back, cond_, flow_n1_)
else:
@@ -339,8 +332,8 @@ def stage2(self, feats, flows):
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', prop_name]
+ feats[k][idx] for k in feats
+ if k not in ['spatial', prop_name]
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
@@ -412,8 +405,8 @@ def stage3(self,
# concatenate and residual blocks
feat = [feat_current] + [
- feats[k][idx]
- for k in feats if k not in ['spatial', prop_name]
+ feats[k][idx] for k in feats
+ if k not in ['spatial', prop_name]
] + [feat_prop]
feat = paddle.concat(feat, axis=1)
@@ -460,12 +453,15 @@ def auxiliary_stage(self, feats, lqs):
# output tensor of auxiliary_stage with shape (n, 3, 4*h, 4*w)
aux_feats['outs'].append(hr)
- aux_feat = self.aux_block_down1(paddle.concat([hr, hr_high],
- axis=1))
+ aux_feat = self.aux_block_down1(
+ paddle.concat(
+ [hr, hr_high], axis=1))
aux_feat = self.aux_block_down2(
- paddle.concat([aux_feat, hr_mid], axis=1))
- aux_feat = self.aux_fusion(paddle.concat([aux_feat, hr_low],
- axis=1))
+ paddle.concat(
+ [aux_feat, hr_mid], axis=1))
+ aux_feat = self.aux_fusion(
+ paddle.concat(
+ [aux_feat, hr_low], axis=1))
# out feature of auxiliary_stage with shape (n, c, h, w)
aux_feats['feats'].append(aux_feat)
@@ -520,8 +516,9 @@ def upsample(self, lqs, feats, aux_feats=None):
outputs.append(hr)
if self.auxiliary_loss:
- return paddle.stack(aux_feats['outs'],
- axis=1), paddle.stack(outputs, axis=1)
+ return paddle.stack(
+ aux_feats['outs'], axis=1), paddle.stack(
+ outputs, axis=1)
return paddle.stack(outputs, axis=1)
def forward(self, lqs):
@@ -547,9 +544,10 @@ def forward(self, lqs):
feats['spatial'] = [feats_[:, i, :, :, :] for i in range(0, t)]
# compute optical flow using the low-res inputs
- assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[4] >= 64, (
- 'The height and width of low-res inputs must be at least 64, '
- f'but got {h} and {w}.')
+ assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[
+ 4] >= 64, (
+ 'The height and width of low-res inputs must be at least 64, '
+ f'but got {h} and {w}.')
flows_forward, flows_backward = self.compute_flow(lqs_downsample)
@@ -581,6 +579,7 @@ class AlignmentModule(nn.Layer):
groups (int): Same as nn.Conv2d.
deformable_groups (int): Number of deformable_groups in DeformConv2D.
"""
+
def __init__(self,
in_channels=128,
out_channels=64,
@@ -599,15 +598,15 @@ def __init__(self,
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2D(out_channels, out_channels, 3, 1, 1),
nn.LeakyReLU(negative_slope=0.1),
- nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1),
- )
- self.dcn = DeformConv2D(in_channels,
- out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- deformable_groups=deformable_groups)
+ nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), )
+ self.dcn = DeformConv2D(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ deformable_groups=deformable_groups)
self.init_offset()
@@ -642,6 +641,7 @@ class ReAlignmentModule(nn.Layer):
groups (int): Same as nn.Conv2d.
deformable_groups (int): Number of deformable_groups in DeformConv2D.
"""
+
def __init__(self,
in_channels=128,
out_channels=64,
@@ -653,13 +653,14 @@ def __init__(self,
deformable_groups=16):
super(ReAlignmentModule, self).__init__()
- self.mdconv = DeformConv2D(in_channels,
- out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- deformable_groups=deformable_groups)
+ self.mdconv = DeformConv2D(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ deformable_groups=deformable_groups)
self.conv_offset = nn.Sequential(
nn.Conv2D(2 * out_channels + 2, out_channels, 3, 1, 1),
nn.LeakyReLU(negative_slope=0.1),
@@ -667,15 +668,15 @@ def __init__(self,
nn.LeakyReLU(negative_slope=0.1),
nn.Conv2D(out_channels, out_channels, 3, 1, 1),
nn.LeakyReLU(negative_slope=0.1),
- nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1),
- )
- self.dcn = DeformConv2D(in_channels,
- out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- deformable_groups=deformable_groups)
+ nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), )
+ self.dcn = DeformConv2D(
+ in_channels,
+ out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ deformable_groups=deformable_groups)
self.init_offset()
@@ -734,6 +735,7 @@ class ModifiedSPyNet(nn.Layer):
use_tiny_block (bool): Whether use tiny spynet.
Default: True.
"""
+
def __init__(self,
act_cfg=dict(name='LeakyReLU'),
num_blocks=6,
@@ -741,7 +743,8 @@ def __init__(self,
super().__init__()
self.num_blocks = num_blocks
self.basic_module = nn.LayerList([
- SPyNetBlock(act_cfg=act_cfg, use_tiny_block=use_tiny_block)
+ SPyNetBlock(
+ act_cfg=act_cfg, use_tiny_block=use_tiny_block)
for _ in range(num_blocks)
])
@@ -781,8 +784,8 @@ def compute_flow(self, ref, supp):
# flow computation
flow = paddle.to_tensor(
np.zeros([
- n, 2, h // (2**(self.num_blocks - 1)), w //
- (2**(self.num_blocks - 1))
+ n, 2, h // (2**(self.num_blocks - 1)), w // (2**(self.num_blocks
+ - 1))
], 'float32'))
for level in range(len(ref)):
@@ -794,13 +797,14 @@ def compute_flow(self, ref, supp):
align_corners=True) * 2.0
# add the residue to the upsampled flow
- flow = flow_up + self.basic_module[level](paddle.concat([
- ref[level],
- flow_warp(supp[level],
- flow_up.transpose([0, 2, 3, 1]),
- padding_mode='border'), flow_up
- ],
- axis=1))
+ flow = flow_up + self.basic_module[level](paddle.concat(
+ [
+ ref[level], flow_warp(
+ supp[level],
+ flow_up.transpose([0, 2, 3, 1]),
+ padding_mode='border'), flow_up
+ ],
+ axis=1))
return flow
@@ -822,8 +826,8 @@ def compute_flow_list(self, ref, supp):
flow_list = []
flow = paddle.to_tensor(
np.zeros([
- n, 2, h // (2**(self.num_blocks - 1)), w //
- (2**(self.num_blocks - 1))
+ n, 2, h // (2**(self.num_blocks - 1)), w // (2**(self.num_blocks
+ - 1))
], 'float32'))
for level in range(len(ref)):
if level == 0:
@@ -834,13 +838,14 @@ def compute_flow_list(self, ref, supp):
align_corners=True) * 2.0
# add the residue to the upsampled flow
- flow = flow_up + self.basic_module[level](paddle.concat([
- ref[level],
- flow_warp(supp[level],
- flow_up.transpose([0, 2, 3, 1]),
- padding_mode='border'), flow_up
- ],
- axis=1))
+ flow = flow_up + self.basic_module[level](paddle.concat(
+ [
+ ref[level], flow_warp(
+ supp[level],
+ flow_up.transpose([0, 2, 3, 1]),
+ padding_mode='border'), flow_up
+ ],
+ axis=1))
flow_list.append(flow)
return flow_list
@@ -861,24 +866,21 @@ def forward(self, ref, supp):
h, w = ref.shape[2:4]
w_up = w if (w % 32) == 0 else 32 * (w // 32 + 1)
h_up = h if (h % 32) == 0 else 32 * (h // 32 + 1)
- ref = F.interpolate(ref,
- size=(h_up, w_up),
- mode='bilinear',
- align_corners=False)
+ ref = F.interpolate(
+ ref, size=(h_up, w_up), mode='bilinear', align_corners=False)
- supp = F.interpolate(supp,
- size=(h_up, w_up),
- mode='bilinear',
- align_corners=False)
+ supp = F.interpolate(
+ supp, size=(h_up, w_up), mode='bilinear', align_corners=False)
ref.stop_gradient = False
supp.stop_gradient = False
# compute flow, and resize back to the original resolution
- flow = F.interpolate(self.compute_flow(ref, supp),
- size=(h, w),
- mode='bilinear',
- align_corners=False)
+ flow = F.interpolate(
+ self.compute_flow(ref, supp),
+ size=(h, w),
+ mode='bilinear',
+ align_corners=False)
# adjust the flow values
flow[:, 0, :, :] *= float(w) / float(w_up)
@@ -891,174 +893,202 @@ class SPyNetBlock(nn.Layer):
"""Basic Block of Modified SPyNet.
refer to Optical Flow Estimation using a Spatial Pyramid Network, CVPR, 2017
"""
+
def __init__(self, act_cfg=dict(name='LeakyReLU'), use_tiny_block=False):
super().__init__()
if use_tiny_block:
self.basic_module = nn.Sequential(
- ConvLayer(in_channels=8,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=8,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=8,
- out_channels=8,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=8,
- out_channels=2,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=None))
+ ConvLayer(
+ in_channels=8,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=8,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=8,
+ out_channels=8,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=8,
+ out_channels=2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=None))
else:
self.basic_module = nn.Sequential(
- ConvLayer(in_channels=8,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=64,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=64,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=32,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=32,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=16,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=act_cfg),
- ConvLayer(in_channels=16,
- out_channels=2,
- kernel_size=3,
- stride=1,
- padding=1,
- act_cfg=None))
+ ConvLayer(
+ in_channels=8,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=64,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=32,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=16,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=act_cfg),
+ ConvLayer(
+ in_channels=16,
+ out_channels=2,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ act_cfg=None))
def forward(self, tensor_input):
"""Forward function of SPyNetBlock.
@@ -1087,13 +1117,14 @@ def __init__(self,
self.act_cfg = act_cfg
self.with_activation = act_cfg is not None
- self.conv = nn.Conv2D(in_channels=in_channels,
- out_channels=out_channels,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding,
- dilation=dilation,
- groups=groups)
+ self.conv = nn.Conv2D(
+ in_channels=in_channels,
+ out_channels=out_channels,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding,
+ dilation=dilation,
+ groups=groups)
if self.with_activation:
if act_cfg['name'] == 'ReLU':
diff --git a/paddlers/models/ppgan/models/generators/occlusion_aware.py b/paddlers/models/ppgan/models/generators/occlusion_aware.py
index 7558caca..6e31102b 100644
--- a/paddlers/models/ppgan/models/generators/occlusion_aware.py
+++ b/paddlers/models/ppgan/models/generators/occlusion_aware.py
@@ -17,6 +17,7 @@ class OcclusionAwareGenerator(nn.Layer):
Generator that given source image and and keypoints try to transform image according to movement trajectories
induced by keypoints. Generator follows Johnson architecture.
"""
+
def __init__(self,
num_channels,
num_kp,
@@ -43,28 +44,31 @@ def __init__(self,
if mobile_net:
self.first = nn.Sequential(
- SameBlock2d(num_channels,
- num_channels,
- kernel_size=3,
- padding=1,
- mobile_net=mobile_net),
- SameBlock2d(num_channels,
- num_channels,
- kernel_size=3,
- padding=1,
- mobile_net=mobile_net),
- SameBlock2d(num_channels,
- block_expansion,
- kernel_size=3,
- padding=1,
- mobile_net=mobile_net)
- )
+ SameBlock2d(
+ num_channels,
+ num_channels,
+ kernel_size=3,
+ padding=1,
+ mobile_net=mobile_net),
+ SameBlock2d(
+ num_channels,
+ num_channels,
+ kernel_size=3,
+ padding=1,
+ mobile_net=mobile_net),
+ SameBlock2d(
+ num_channels,
+ block_expansion,
+ kernel_size=3,
+ padding=1,
+ mobile_net=mobile_net))
else:
- self.first = SameBlock2d(num_channels,
- block_expansion,
- kernel_size=(7, 7),
- padding=(3, 3),
- mobile_net=mobile_net)
+ self.first = SameBlock2d(
+ num_channels,
+ block_expansion,
+ kernel_size=(7, 7),
+ padding=(3, 3),
+ mobile_net=mobile_net)
down_blocks = []
if mobile_net:
@@ -72,19 +76,21 @@ def __init__(self,
in_features = min(max_features, block_expansion * (2**i))
out_features = min(max_features, block_expansion * (2**(i + 1)))
down_blocks.append(
- MobileDownBlock2d(in_features,
- out_features,
- kernel_size=(3, 3),
- padding=(1, 1)))
+ MobileDownBlock2d(
+ in_features,
+ out_features,
+ kernel_size=(3, 3),
+ padding=(1, 1)))
else:
for i in range(num_down_blocks):
in_features = min(max_features, block_expansion * (2**i))
out_features = min(max_features, block_expansion * (2**(i + 1)))
down_blocks.append(
- DownBlock2d(in_features,
- out_features,
- kernel_size=(3, 3),
- padding=(1, 1)))
+ DownBlock2d(
+ in_features,
+ out_features,
+ kernel_size=(3, 3),
+ padding=(1, 1)))
self.down_blocks = nn.LayerList(down_blocks)
up_blocks = []
@@ -92,26 +98,26 @@ def __init__(self,
for i in range(num_down_blocks):
in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i)))
- out_features = min(
- max_features,
- block_expansion * (2**(num_down_blocks - i - 1)))
+ out_features = min(max_features, block_expansion *
+ (2**(num_down_blocks - i - 1)))
up_blocks.append(
- MobileUpBlock2d(in_features,
- out_features,
- kernel_size=(3, 3),
- padding=(1, 1)))
+ MobileUpBlock2d(
+ in_features,
+ out_features,
+ kernel_size=(3, 3),
+ padding=(1, 1)))
else:
for i in range(num_down_blocks):
in_features = min(max_features,
block_expansion * (2**(num_down_blocks - i)))
- out_features = min(
- max_features,
- block_expansion * (2**(num_down_blocks - i - 1)))
+ out_features = min(max_features, block_expansion *
+ (2**(num_down_blocks - i - 1)))
up_blocks.append(
- UpBlock2d(in_features,
- out_features,
- kernel_size=(3, 3),
- padding=(1, 1)))
+ UpBlock2d(
+ in_features,
+ out_features,
+ kernel_size=(3, 3),
+ padding=(1, 1)))
self.up_blocks = nn.LayerList(up_blocks)
self.bottleneck = paddle.nn.Sequential()
@@ -120,39 +126,42 @@ def __init__(self,
for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer(
'r' + str(i),
- MobileResBlock2d(in_features,
- kernel_size=(3, 3),
- padding=(1, 1)))
+ MobileResBlock2d(
+ in_features, kernel_size=(3, 3), padding=(1, 1)))
else:
for i in range(num_bottleneck_blocks):
self.bottleneck.add_sublayer(
'r' + str(i),
- ResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1)))
+ ResBlock2d(
+ in_features, kernel_size=(3, 3), padding=(1, 1)))
if mobile_net:
self.final = nn.Sequential(
- nn.Conv2D(block_expansion,
- block_expansion,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1),
- nn.ReLU(),
- nn.Conv2D(block_expansion,
- block_expansion,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1),
- nn.ReLU(),
- nn.Conv2D(block_expansion,
- num_channels,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1)
- )
+ nn.Conv2D(
+ block_expansion,
+ block_expansion,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1),
+ nn.ReLU(),
+ nn.Conv2D(
+ block_expansion,
+ block_expansion,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1),
+ nn.ReLU(),
+ nn.Conv2D(
+ block_expansion,
+ num_channels,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1))
else:
- self.final = nn.Conv2D(block_expansion,
- num_channels,
- kernel_size=(7, 7),
- padding=(3, 3))
+ self.final = nn.Conv2D(
+ block_expansion,
+ num_channels,
+ kernel_size=(7, 7),
+ padding=(3, 3))
self.estimate_occlusion_map = estimate_occlusion_map
self.num_channels = num_channels
self.inference = inference
@@ -164,30 +173,30 @@ def deform_input(self, inp, deformation):
_, _, h, w = inp.shape
if h_old != h or w_old != w:
deformation = deformation.transpose([0, 3, 1, 2])
- deformation = F.interpolate(deformation,
- size=(h, w),
- mode='bilinear',
- align_corners=False)
+ deformation = F.interpolate(
+ deformation, size=(h, w), mode='bilinear', align_corners=False)
deformation = deformation.transpose([0, 2, 3, 1])
if self.inference:
identity_grid = make_coordinate_grid((h, w), type=inp.dtype)
identity_grid = identity_grid.reshape([1, h, w, 2])
visualization_matrix = np.zeros((h, w)).astype("float32")
- visualization_matrix[self.pad:h - self.pad,
- self.pad:w - self.pad] = 1.0
+ visualization_matrix[self.pad:h - self.pad, self.pad:w -
+ self.pad] = 1.0
gauss_kernel = paddle.to_tensor(
- cv2.GaussianBlur(visualization_matrix, (9, 9),
- 0.0,
- borderType=cv2.BORDER_ISOLATED))
+ cv2.GaussianBlur(
+ visualization_matrix, (9, 9),
+ 0.0,
+ borderType=cv2.BORDER_ISOLATED))
gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1)
- deformation = gauss_kernel * deformation + (
- 1 - gauss_kernel) * identity_grid
+ deformation = gauss_kernel * deformation + (1 - gauss_kernel
+ ) * identity_grid
- return F.grid_sample(inp,
- deformation,
- mode='bilinear',
- padding_mode='zeros',
- align_corners=True)
+ return F.grid_sample(
+ inp,
+ deformation,
+ mode='bilinear',
+ padding_mode='zeros',
+ align_corners=True)
def forward(self, source_image, kp_driving, kp_source):
# Encoding (downsampling) part
@@ -198,9 +207,10 @@ def forward(self, source_image, kp_driving, kp_source):
# Transforming feature representation according to deformation and occlusion
output_dict = {}
if self.dense_motion_network is not None:
- dense_motion = self.dense_motion_network(source_image=source_image,
- kp_driving=kp_driving,
- kp_source=kp_source)
+ dense_motion = self.dense_motion_network(
+ source_image=source_image,
+ kp_driving=kp_driving,
+ kp_source=kp_source)
output_dict['mask'] = dense_motion['mask']
output_dict['sparse_deformed'] = dense_motion['sparse_deformed']
@@ -215,10 +225,11 @@ def forward(self, source_image, kp_driving, kp_source):
if occlusion_map is not None:
if out.shape[2] != occlusion_map.shape[2] or out.shape[
3] != occlusion_map.shape[3]:
- occlusion_map = F.interpolate(occlusion_map,
- size=out.shape[2:],
- mode='bilinear',
- align_corners=False)
+ occlusion_map = F.interpolate(
+ occlusion_map,
+ size=out.shape[2:],
+ mode='bilinear',
+ align_corners=False)
if self.inference and not self.mobile_net:
h, w = occlusion_map.shape[2:]
occlusion_map[:, :, 0:self.pad, :] = 1.0
diff --git a/paddlers/models/ppgan/models/generators/pan.py b/paddlers/models/ppgan/models/generators/pan.py
index 35624274..43bd7c37 100644
--- a/paddlers/models/ppgan/models/generators/pan.py
+++ b/paddlers/models/ppgan/models/generators/pan.py
@@ -41,6 +41,7 @@ def make_multi_blocks(func, num_layers):
class PA(nn.Layer):
'''PA is pixel attention'''
+
def __init__(self, nf):
super(PA, self).__init__()
@@ -62,16 +63,18 @@ def __init__(self, nf, k_size=3):
super(PAConv, self).__init__()
self.k2 = nn.Conv2D(nf, nf, 1) # 1x1 convolution nf->nf
self.sigmoid = nn.Sigmoid()
- self.k3 = nn.Conv2D(nf,
- nf,
- kernel_size=k_size,
- padding=(k_size - 1) // 2,
- bias_attr=False) # 3x3 convolution
- self.k4 = nn.Conv2D(nf,
- nf,
- kernel_size=k_size,
- padding=(k_size - 1) // 2,
- bias_attr=False) # 3x3 convolution
+ self.k3 = nn.Conv2D(
+ nf,
+ nf,
+ kernel_size=k_size,
+ padding=(k_size - 1) // 2,
+ bias_attr=False) # 3x3 convolution
+ self.k4 = nn.Conv2D(
+ nf,
+ nf,
+ kernel_size=k_size,
+ padding=(k_size - 1) // 2,
+ bias_attr=False) # 3x3 convolution
def forward(self, x):
@@ -88,34 +91,30 @@ class SCPA(nn.Layer):
"""
SCPA is modified from SCNet (Jiang-Jiang Liu et al. Improving Convolutional Networks with Self-Calibrated Convolutions. In CVPR, 2020)
"""
+
def __init__(self, nf, reduction=2, stride=1, dilation=1):
super(SCPA, self).__init__()
group_width = nf // reduction
- self.conv1_a = nn.Conv2D(nf,
- group_width,
- kernel_size=1,
- bias_attr=False)
- self.conv1_b = nn.Conv2D(nf,
- group_width,
- kernel_size=1,
- bias_attr=False)
+ self.conv1_a = nn.Conv2D(
+ nf, group_width, kernel_size=1, bias_attr=False)
+ self.conv1_b = nn.Conv2D(
+ nf, group_width, kernel_size=1, bias_attr=False)
self.k1 = nn.Sequential(
- nn.Conv2D(group_width,
- group_width,
- kernel_size=3,
- stride=stride,
- padding=dilation,
- dilation=dilation,
- bias_attr=False))
+ nn.Conv2D(
+ group_width,
+ group_width,
+ kernel_size=3,
+ stride=stride,
+ padding=dilation,
+ dilation=dilation,
+ bias_attr=False))
self.PAConv = PAConv(group_width)
- self.conv3 = nn.Conv2D(group_width * reduction,
- nf,
- kernel_size=1,
- bias_attr=False)
+ self.conv3 = nn.Conv2D(
+ group_width * reduction, nf, kernel_size=1, bias_attr=False)
self.lrelu = nn.LeakyReLU(negative_slope=0.2)
@@ -174,24 +173,25 @@ def forward(self, x):
if self.scale == 2 or self.scale == 3:
fea = self.upconv1(
- F.interpolate(fea, scale_factor=self.scale, mode='nearest'))
+ F.interpolate(
+ fea, scale_factor=self.scale, mode='nearest'))
fea = self.lrelu(self.att1(fea))
fea = self.lrelu(self.HRconv1(fea))
elif self.scale == 4:
fea = self.upconv1(
- F.interpolate(fea, scale_factor=2, mode='nearest'))
+ F.interpolate(
+ fea, scale_factor=2, mode='nearest'))
fea = self.lrelu(self.att1(fea))
fea = self.lrelu(self.HRconv1(fea))
fea = self.upconv2(
- F.interpolate(fea, scale_factor=2, mode='nearest'))
+ F.interpolate(
+ fea, scale_factor=2, mode='nearest'))
fea = self.lrelu(self.att2(fea))
fea = self.lrelu(self.HRconv2(fea))
out = self.conv_last(fea)
- ILR = F.interpolate(x,
- scale_factor=self.scale,
- mode='bilinear',
- align_corners=False)
+ ILR = F.interpolate(
+ x, scale_factor=self.scale, mode='bilinear', align_corners=False)
out = out + ILR
return out
diff --git a/paddlers/models/ppgan/models/generators/remaster.py b/paddlers/models/ppgan/models/generators/remaster.py
index 80340216..ce64377e 100644
--- a/paddlers/models/ppgan/models/generators/remaster.py
+++ b/paddlers/models/ppgan/models/generators/remaster.py
@@ -25,11 +25,12 @@ def __init__(self,
stride=(1, 1, 1),
padding=(0, 1, 1)):
super(TempConv, self).__init__()
- self.conv3d = nn.Conv3D(in_planes,
- out_planes,
- kernel_size=kernel_size,
- stride=stride,
- padding=padding)
+ self.conv3d = nn.Conv3D(
+ in_planes,
+ out_planes,
+ kernel_size=kernel_size,
+ stride=stride,
+ padding=padding)
self.bn = nn.BatchNorm(out_planes)
def forward(self, x):
@@ -40,11 +41,12 @@ class Upsample(nn.Layer):
def __init__(self, in_planes, out_planes, scale_factor=(1, 2, 2)):
super(Upsample, self).__init__()
self.scale_factor = scale_factor
- self.conv3d = nn.Conv3D(in_planes,
- out_planes,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1))
+ self.conv3d = nn.Conv3D(
+ in_planes,
+ out_planes,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1))
self.bn = nn.BatchNorm(out_planes)
def forward(self, x):
@@ -55,22 +57,24 @@ def forward(self, x):
return F.elu(
self.bn(
self.conv3d(
- F.interpolate(x,
- size=out_size,
- mode='trilinear',
- align_corners=False,
- data_format='NCDHW',
- align_mode=0))))
+ F.interpolate(
+ x,
+ size=out_size,
+ mode='trilinear',
+ align_corners=False,
+ data_format='NCDHW',
+ align_mode=0))))
class UpsampleConcat(nn.Layer):
def __init__(self, in_planes_up, in_planes_flat, out_planes):
super(UpsampleConcat, self).__init__()
- self.conv3d = TempConv(in_planes_up + in_planes_flat,
- out_planes,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1))
+ self.conv3d = TempConv(
+ in_planes_up + in_planes_flat,
+ out_planes,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1))
def forward(self, x1, x2):
scale_factor = (1, 2, 2)
@@ -78,12 +82,13 @@ def forward(self, x1, x2):
for i in range(3):
out_size[i] = scale_factor[i] * out_size[i]
- x1 = F.interpolate(x1,
- size=out_size,
- mode='trilinear',
- align_corners=False,
- data_format='NCDHW',
- align_mode=0)
+ x1 = F.interpolate(
+ x1,
+ size=out_size,
+ mode='trilinear',
+ align_corners=False,
+ data_format='NCDHW',
+ align_mode=0)
x = paddle.concat([x1, x2], axis=1)
return self.conv3d(x)
@@ -98,17 +103,19 @@ class SourceReferenceAttention(nn.Layer):
in_planes_r (int): Number of input reference feature vector channels.
"""
+
def __init__(self, in_planes_s, in_planes_r):
super(SourceReferenceAttention, self).__init__()
- self.query_conv = nn.Conv3D(in_channels=in_planes_s,
- out_channels=in_planes_s // 8,
- kernel_size=1)
- self.key_conv = nn.Conv3D(in_channels=in_planes_r,
- out_channels=in_planes_r // 8,
- kernel_size=1)
- self.value_conv = nn.Conv3D(in_channels=in_planes_r,
- out_channels=in_planes_r,
- kernel_size=1)
+ self.query_conv = nn.Conv3D(
+ in_channels=in_planes_s,
+ out_channels=in_planes_s // 8,
+ kernel_size=1)
+ self.key_conv = nn.Conv3D(
+ in_channels=in_planes_r,
+ out_channels=in_planes_r // 8,
+ kernel_size=1)
+ self.value_conv = nn.Conv3D(
+ in_channels=in_planes_r, out_channels=in_planes_r, kernel_size=1)
self.gamma = self.create_parameter(
shape=[1],
dtype=self.query_conv.weight.dtype,
@@ -118,16 +125,16 @@ def forward(self, source, reference):
s_batchsize, sC, sT, sH, sW = source.shape
r_batchsize, rC, rT, rH, rW = reference.shape
- proj_query = paddle.reshape(self.query_conv(source),
- [s_batchsize, -1, sT * sH * sW])
+ proj_query = paddle.reshape(
+ self.query_conv(source), [s_batchsize, -1, sT * sH * sW])
proj_query = paddle.transpose(proj_query, [0, 2, 1])
- proj_key = paddle.reshape(self.key_conv(reference),
- [r_batchsize, -1, rT * rW * rH])
+ proj_key = paddle.reshape(
+ self.key_conv(reference), [r_batchsize, -1, rT * rW * rH])
energy = paddle.bmm(proj_query, proj_key)
attention = F.softmax(energy)
- proj_value = paddle.reshape(self.value_conv(reference),
- [r_batchsize, -1, rT * rH * rW])
+ proj_value = paddle.reshape(
+ self.value_conv(reference), [r_batchsize, -1, rT * rH * rW])
out = paddle.bmm(proj_value, paddle.transpose(attention, [0, 2, 1]))
out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW])
@@ -140,32 +147,44 @@ def __init__(self):
super(NetworkR, self).__init__()
self.layers = nn.Sequential(
- nn.Pad3D((1, 1, 1, 1, 1, 1), mode='replicate'),
- TempConv(1,
- 64,
- kernel_size=(3, 3, 3),
- stride=(1, 2, 2),
- padding=(0, 0, 0)),
- TempConv(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(128,
- 256,
- kernel_size=(3, 3, 3),
- stride=(1, 2, 2),
- padding=(1, 1, 1)),
- TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ nn.Pad3D(
+ (1, 1, 1, 1, 1, 1), mode='replicate'),
+ TempConv(
+ 1,
+ 64,
+ kernel_size=(3, 3, 3),
+ stride=(1, 2, 2),
+ padding=(0, 0, 0)),
+ TempConv(
+ 64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 128,
+ 256,
+ kernel_size=(3, 3, 3),
+ stride=(1, 2, 2),
+ padding=(1, 1, 1)),
+ TempConv(
+ 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
Upsample(256, 128),
- TempConv(128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
- TempConv(64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
+ TempConv(
+ 64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)),
Upsample(64, 16),
- nn.Conv3D(16,
- 1,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1)))
+ nn.Conv3D(
+ 16,
+ 1,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1)))
def forward(self, x):
return paddle.clip(
@@ -177,17 +196,25 @@ def __init__(self):
super(NetworkC, self).__init__()
self.down1 = nn.Sequential(
- nn.Pad3D((1, 1, 1, 1, 0, 0), mode='replicate'),
- TempConv(1, 64, stride=(1, 2, 2), padding=(0, 0, 0)),
- TempConv(64, 128), TempConv(128, 128),
- TempConv(128, 256, stride=(1, 2, 2)), TempConv(256, 256),
- TempConv(256, 256), TempConv(256, 512, stride=(1, 2, 2)),
- TempConv(512, 512), TempConv(512, 512))
+ nn.Pad3D(
+ (1, 1, 1, 1, 0, 0), mode='replicate'),
+ TempConv(
+ 1, 64, stride=(1, 2, 2), padding=(0, 0, 0)),
+ TempConv(64, 128),
+ TempConv(128, 128),
+ TempConv(
+ 128, 256, stride=(1, 2, 2)),
+ TempConv(256, 256),
+ TempConv(256, 256),
+ TempConv(
+ 256, 512, stride=(1, 2, 2)),
+ TempConv(512, 512),
+ TempConv(512, 512))
self.flat = nn.Sequential(TempConv(512, 512), TempConv(512, 512))
self.down2 = nn.Sequential(
- TempConv(512, 512, stride=(1, 2, 2)),
- TempConv(512, 512),
- )
+ TempConv(
+ 512, 512, stride=(1, 2, 2)),
+ TempConv(512, 512), )
self.stattn1 = SourceReferenceAttention(
512, 512) # Source-Reference Attention
self.stattn2 = SourceReferenceAttention(
@@ -196,48 +223,54 @@ def __init__(self):
self.conv1 = TempConv(512, 512)
self.up1 = UpsampleConcat(512, 512, 512) # 1/8
self.selfattn2 = SourceReferenceAttention(512, 512) # Self Attention
- self.conv2 = TempConv(512,
- 256,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1))
+ self.conv2 = TempConv(
+ 512,
+ 256,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1))
self.up2 = nn.Sequential(
Upsample(256, 128), # 1/4
- TempConv(128,
- 64,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1)))
+ TempConv(
+ 128,
+ 64,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1)))
self.up3 = nn.Sequential(
Upsample(64, 32), # 1/2
- TempConv(32,
- 16,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1)))
+ TempConv(
+ 32,
+ 16,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1)))
self.up4 = nn.Sequential(
Upsample(16, 8), # 1/1
- nn.Conv3D(8,
- 2,
- kernel_size=(3, 3, 3),
- stride=(1, 1, 1),
- padding=(1, 1, 1)))
+ nn.Conv3D(
+ 8,
+ 2,
+ kernel_size=(3, 3, 3),
+ stride=(1, 1, 1),
+ padding=(1, 1, 1)))
self.reffeatnet1 = nn.Sequential(
- TempConv(3, 64, stride=(1, 2, 2)),
+ TempConv(
+ 3, 64, stride=(1, 2, 2)),
TempConv(64, 128),
TempConv(128, 128),
- TempConv(128, 256, stride=(1, 2, 2)),
+ TempConv(
+ 128, 256, stride=(1, 2, 2)),
TempConv(256, 256),
TempConv(256, 256),
- TempConv(256, 512, stride=(1, 2, 2)),
+ TempConv(
+ 256, 512, stride=(1, 2, 2)),
TempConv(512, 512),
- TempConv(512, 512),
- )
+ TempConv(512, 512), )
self.reffeatnet2 = nn.Sequential(
- TempConv(512, 512, stride=(1, 2, 2)),
- TempConv(512, 512),
+ TempConv(
+ 512, 512, stride=(1, 2, 2)),
TempConv(512, 512),
- )
+ TempConv(512, 512), )
def forward(self, x, x_refs=None):
x1 = self.down1(x - 0.4462414)
diff --git a/paddlers/models/ppgan/models/generators/resnet.py b/paddlers/models/ppgan/models/generators/resnet.py
index 3c30d1ae..9ce93c37 100644
--- a/paddlers/models/ppgan/models/generators/resnet.py
+++ b/paddlers/models/ppgan/models/generators/resnet.py
@@ -39,6 +39,7 @@ class ResnetGenerator(nn.Layer):
padding_type (str): the name of padding layer in conv layers: reflect | replicate | zero
"""
+
def __init__(self,
input_nc,
output_nc,
@@ -58,52 +59,48 @@ def __init__(self,
use_bias = norm_layer == nn.InstanceNorm2D
model = [
- nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"),
- nn.Conv2D(input_nc,
- ngf,
- kernel_size=7,
- padding=0,
- bias_attr=use_bias),
- norm_layer(ngf),
- nn.ReLU()
+ nn.Pad2D(
+ padding=[3, 3, 3, 3], mode="reflect"), nn.Conv2D(
+ input_nc, ngf, kernel_size=7, padding=0,
+ bias_attr=use_bias), norm_layer(ngf), nn.ReLU()
]
n_downsampling = 2
for i in range(n_downsampling): # add downsampling layers
mult = 2**i
model += [
- nn.Conv2D(ngf * mult,
- ngf * mult * 2,
- kernel_size=3,
- stride=2,
- padding=1,
- bias_attr=use_bias),
- norm_layer(ngf * mult * 2),
- nn.ReLU()
+ nn.Conv2D(
+ ngf * mult,
+ ngf * mult * 2,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU()
]
mult = 2**n_downsampling
for i in range(n_blocks): # add ResNet blocks
model += [
- ResnetBlock(ngf * mult,
- padding_type=padding_type,
- norm_layer=norm_layer,
- use_dropout=use_dropout,
- use_bias=use_bias)
+ ResnetBlock(
+ ngf * mult,
+ padding_type=padding_type,
+ norm_layer=norm_layer,
+ use_dropout=use_dropout,
+ use_bias=use_bias)
]
for i in range(n_downsampling): # add upsampling layers
mult = 2**(n_downsampling - i)
model += [
- nn.Conv2DTranspose(ngf * mult,
- int(ngf * mult / 2),
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1,
- bias_attr=use_bias),
- norm_layer(int(ngf * mult / 2)),
+ nn.Conv2DTranspose(
+ ngf * mult,
+ int(ngf * mult / 2),
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1,
+ bias_attr=use_bias), norm_layer(int(ngf * mult / 2)),
nn.ReLU()
]
model += [nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect")]
@@ -119,6 +116,7 @@ def forward(self, x):
class ResnetBlock(nn.Layer):
"""Define a Resnet block"""
+
def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias):
"""Initialize the Resnet block
@@ -155,9 +153,9 @@ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout,
padding_type)
conv_block += [
- nn.Conv2D(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
- norm_layer(dim),
- nn.ReLU()
+ nn.Conv2D(
+ dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
+ norm_layer(dim), nn.ReLU()
]
if use_dropout:
conv_block += [nn.Dropout(0.5)]
@@ -171,7 +169,8 @@ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout,
raise NotImplementedError('padding [%s] is not implemented' %
padding_type)
conv_block += [
- nn.Conv2D(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
+ nn.Conv2D(
+ dim, dim, kernel_size=3, padding=p, bias_attr=use_bias),
norm_layer(dim)
]
diff --git a/paddlers/models/ppgan/models/generators/rrdb_net.py b/paddlers/models/ppgan/models/generators/rrdb_net.py
index f69d42b3..639faa2d 100644
--- a/paddlers/models/ppgan/models/generators/rrdb_net.py
+++ b/paddlers/models/ppgan/models/generators/rrdb_net.py
@@ -44,6 +44,7 @@ def forward(self, x):
class RRDB(nn.Layer):
'''Residual in Residual Dense Block'''
+
def __init__(self, nf, gc=32):
super(RRDB, self).__init__()
self.RDB1 = ResidualDenseBlock_5C(nf, gc)
@@ -87,9 +88,11 @@ def forward(self, x):
fea = fea + trunk
fea = self.lrelu(
- self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest')))
+ self.upconv1(F.interpolate(
+ fea, scale_factor=2, mode='nearest')))
fea = self.lrelu(
- self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest')))
+ self.upconv2(F.interpolate(
+ fea, scale_factor=2, mode='nearest')))
out = self.conv_last(self.lrelu(self.HRconv(fea)))
return out
diff --git a/paddlers/models/ppgan/models/generators/unet.py b/paddlers/models/ppgan/models/generators/unet.py
index f8c2a1b1..bb670bdd 100644
--- a/paddlers/models/ppgan/models/generators/unet.py
+++ b/paddlers/models/ppgan/models/generators/unet.py
@@ -24,6 +24,7 @@
@GENERATORS.register()
class UnetGenerator(nn.Layer):
"""Create a Unet-based generator"""
+
def __init__(self,
input_nc,
output_nc,
@@ -57,28 +58,32 @@ def __init__(self,
innermost=True) # add the innermost layer
for i in range(num_downs -
5): # add intermediate layers with ngf * 8 filters
- unet_block = UnetSkipConnectionBlock(ngf * 8,
- ngf * 8,
- input_nc=None,
- submodule=unet_block,
- norm_layer=norm_layer,
- use_dropout=use_dropout)
+ unet_block = UnetSkipConnectionBlock(
+ ngf * 8,
+ ngf * 8,
+ input_nc=None,
+ submodule=unet_block,
+ norm_layer=norm_layer,
+ use_dropout=use_dropout)
# gradually reduce the number of filters from ngf * 8 to ngf
- unet_block = UnetSkipConnectionBlock(ngf * 4,
- ngf * 8,
- input_nc=None,
- submodule=unet_block,
- norm_layer=norm_layer)
- unet_block = UnetSkipConnectionBlock(ngf * 2,
- ngf * 4,
- input_nc=None,
- submodule=unet_block,
- norm_layer=norm_layer)
- unet_block = UnetSkipConnectionBlock(ngf,
- ngf * 2,
- input_nc=None,
- submodule=unet_block,
- norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(
+ ngf * 4,
+ ngf * 8,
+ input_nc=None,
+ submodule=unet_block,
+ norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(
+ ngf * 2,
+ ngf * 4,
+ input_nc=None,
+ submodule=unet_block,
+ norm_layer=norm_layer)
+ unet_block = UnetSkipConnectionBlock(
+ ngf,
+ ngf * 2,
+ input_nc=None,
+ submodule=unet_block,
+ norm_layer=norm_layer)
self.model = UnetSkipConnectionBlock(
output_nc,
ngf,
@@ -97,6 +102,7 @@ class UnetSkipConnectionBlock(nn.Layer):
X -------------------identity----------------------
|-- downsampling -- |submodule| -- upsampling --|
"""
+
def __init__(self,
outer_nc,
inner_nc,
@@ -126,43 +132,43 @@ def __init__(self,
use_bias = norm_layer == nn.InstanceNorm2D
if input_nc is None:
input_nc = outer_nc
- downconv = nn.Conv2D(input_nc,
- inner_nc,
- kernel_size=4,
- stride=2,
- padding=1,
- bias_attr=use_bias)
+ downconv = nn.Conv2D(
+ input_nc,
+ inner_nc,
+ kernel_size=4,
+ stride=2,
+ padding=1,
+ bias_attr=use_bias)
downrelu = nn.LeakyReLU(0.2)
downnorm = norm_layer(inner_nc)
uprelu = nn.ReLU()
upnorm = norm_layer(outer_nc)
if outermost:
- upconv = nn.Conv2DTranspose(inner_nc * 2,
- outer_nc,
- kernel_size=4,
- stride=2,
- padding=1)
+ upconv = nn.Conv2DTranspose(
+ inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1)
down = [downconv]
up = [uprelu, upconv, nn.Tanh()]
model = down + [submodule] + up
elif innermost:
- upconv = nn.Conv2DTranspose(inner_nc,
- outer_nc,
- kernel_size=4,
- stride=2,
- padding=1,
- bias_attr=use_bias)
+ upconv = nn.Conv2DTranspose(
+ inner_nc,
+ outer_nc,
+ kernel_size=4,
+ stride=2,
+ padding=1,
+ bias_attr=use_bias)
down = [downrelu, downconv]
up = [uprelu, upconv, upnorm]
model = down + up
else:
- upconv = nn.Conv2DTranspose(inner_nc * 2,
- outer_nc,
- kernel_size=4,
- stride=2,
- padding=1,
- bias_attr=use_bias)
+ upconv = nn.Conv2DTranspose(
+ inner_nc * 2,
+ outer_nc,
+ kernel_size=4,
+ stride=2,
+ padding=1,
+ bias_attr=use_bias)
down = [downrelu, downconv, downnorm]
up = [uprelu, upconv, upnorm]
diff --git a/paddlers/models/ppgan/models/generators/wav2lip.py b/paddlers/models/ppgan/models/generators/wav2lip.py
index 5c8b0c94..33ee0f87 100644
--- a/paddlers/models/ppgan/models/generators/wav2lip.py
+++ b/paddlers/models/ppgan/models/generators/wav2lip.py
@@ -16,271 +16,282 @@ class Wav2Lip(nn.Layer):
def __init__(self):
super(Wav2Lip, self).__init__()
- self.face_encoder_blocks = nn.LayerList([
- nn.Sequential(ConvBNRelu(6, 16, kernel_size=7, stride=1,
- padding=3)),
- nn.Sequential(
- ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True)),
- nn.Sequential(
- ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True)),
- nn.Sequential(
- ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True)),
- nn.Sequential(
- ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True)),
- nn.Sequential(
- ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0),
- ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0)),
- ])
+ self.face_encoder_blocks = nn.LayerList(
+ [
+ nn.Sequential(
+ ConvBNRelu(
+ 6, 16, kernel_size=7, stride=1, padding=3)),
+ nn.Sequential(
+ ConvBNRelu(
+ 16, 32, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 32,
+ 32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 32,
+ 32,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True)),
+ nn.Sequential(
+ ConvBNRelu(
+ 32, 64, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 64,
+ 64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 64,
+ 64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 64,
+ 64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True)),
+ nn.Sequential(
+ ConvBNRelu(
+ 64, 128, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 128,
+ 128,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 128,
+ 128,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True)),
+ nn.Sequential(
+ ConvBNRelu(
+ 128, 256, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 256,
+ 256,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 256,
+ 256,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True)),
+ nn.Sequential(
+ ConvBNRelu(
+ 256, 512, kernel_size=3, stride=2, padding=1),
+ ConvBNRelu(
+ 512,
+ 512,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ ConvBNRelu(
+ 512, 512, kernel_size=3, stride=1, padding=0),
+ ConvBNRelu(
+ 512, 512, kernel_size=1, stride=1, padding=0)),
+ ])
self.audio_encoder = nn.Sequential(
- ConvBNRelu(1, 32, kernel_size=3, stride=1, padding=1),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(32,
- 32,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(32, 64, kernel_size=3, stride=(3, 1), padding=1),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64, 128, kernel_size=3, stride=3, padding=1),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128, 256, kernel_size=3, stride=(3, 2), padding=1),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256, 512, kernel_size=3, stride=1, padding=0),
- ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0),
- )
+ ConvBNRelu(
+ 1, 32, kernel_size=3, stride=1, padding=1),
+ ConvBNRelu(
+ 32, 32, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 32, 32, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 32, 64, kernel_size=3, stride=(3, 1), padding=1),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 64, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 64, 128, kernel_size=3, stride=3, padding=1),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 128, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 128, 256, kernel_size=3, stride=(3, 2), padding=1),
+ ConvBNRelu(
+ 256, 256, kernel_size=3, stride=1, padding=1, residual=True),
+ ConvBNRelu(
+ 256, 512, kernel_size=3, stride=1, padding=0),
+ ConvBNRelu(
+ 512, 512, kernel_size=1, stride=1, padding=0), )
- self.face_decoder_blocks = nn.LayerList([
- nn.Sequential(
- ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0), ),
- nn.Sequential(
- Conv2dTransposeRelu(1024,
- 512,
- kernel_size=3,
- stride=1,
- padding=0),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- Conv2dTransposeRelu(1024,
- 512,
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(512,
- 512,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- Conv2dTransposeRelu(768,
- 384,
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1),
- ConvBNRelu(384,
- 384,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(384,
- 384,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- Conv2dTransposeRelu(512,
- 256,
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(256,
- 256,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- Conv2dTransposeRelu(320,
- 128,
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(128,
- 128,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- nn.Sequential(
- Conv2dTransposeRelu(160,
- 64,
- kernel_size=3,
- stride=2,
- padding=1,
- output_padding=1),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ConvBNRelu(64,
- 64,
- kernel_size=3,
- stride=1,
- padding=1,
- residual=True),
- ),
- ])
+ self.face_decoder_blocks = nn.LayerList(
+ [
+ nn.Sequential(
+ ConvBNRelu(
+ 512, 512, kernel_size=1, stride=1, padding=0), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 1024, 512, kernel_size=3, stride=1, padding=0),
+ ConvBNRelu(
+ 512,
+ 512,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 1024,
+ 512,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1),
+ ConvBNRelu(
+ 512,
+ 512,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 512,
+ 512,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 768,
+ 384,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1),
+ ConvBNRelu(
+ 384,
+ 384,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 384,
+ 384,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 512,
+ 256,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1),
+ ConvBNRelu(
+ 256,
+ 256,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 256,
+ 256,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 320,
+ 128,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1),
+ ConvBNRelu(
+ 128,
+ 128,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 128,
+ 128,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ nn.Sequential(
+ Conv2dTransposeRelu(
+ 160,
+ 64,
+ kernel_size=3,
+ stride=2,
+ padding=1,
+ output_padding=1),
+ ConvBNRelu(
+ 64,
+ 64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True),
+ ConvBNRelu(
+ 64,
+ 64,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ residual=True), ),
+ ])
self.output_block = nn.Sequential(
- ConvBNRelu(80, 32, kernel_size=3, stride=1, padding=1),
- nn.Conv2D(32, 3, kernel_size=1, stride=1, padding=0), nn.Sigmoid())
+ ConvBNRelu(
+ 80, 32, kernel_size=3, stride=1, padding=1),
+ nn.Conv2D(
+ 32, 3, kernel_size=1, stride=1, padding=0),
+ nn.Sigmoid())
def forward(self, audio_sequences, face_sequences):
B = audio_sequences.shape[0]
input_dim_size = len(face_sequences.shape)
if input_dim_size > 4:
- audio_sequences = paddle.concat([
- audio_sequences[:, i] for i in range(audio_sequences.shape[1])
- ],
- axis=0)
- face_sequences = paddle.concat([
- face_sequences[:, :, i] for i in range(face_sequences.shape[2])
- ],
- axis=0)
+ audio_sequences = paddle.concat(
+ [
+ audio_sequences[:, i]
+ for i in range(audio_sequences.shape[1])
+ ],
+ axis=0)
+ face_sequences = paddle.concat(
+ [
+ face_sequences[:, :, i]
+ for i in range(face_sequences.shape[2])
+ ],
+ axis=0)
audio_embedding = self.audio_encoder(audio_sequences)
diff --git a/paddlers/models/ppgan/models/lapstyle_model.py b/paddlers/models/ppgan/models/lapstyle_model.py
index bf090ea4..b8985130 100644
--- a/paddlers/models/ppgan/models/lapstyle_model.py
+++ b/paddlers/models/ppgan/models/lapstyle_model.py
@@ -76,9 +76,8 @@ def backward_Dec(self):
"""content loss"""
self.loss_c = 0
for layer in self.content_layers:
- self.loss_c += self.calc_content_loss(self.tF[layer],
- self.cF[layer],
- norm=True)
+ self.loss_c += self.calc_content_loss(
+ self.tF[layer], self.cF[layer], norm=True)
self.losses['loss_c'] = self.loss_c
"""style loss"""
self.loss_s = 0
@@ -242,9 +241,8 @@ def backward_G(self):
"""content loss"""
self.loss_c = 0
for layer in self.content_layers:
- self.loss_c += self.calc_content_loss(self.tF[layer],
- self.cF[layer],
- norm=True)
+ self.loss_c += self.calc_content_loss(
+ self.tF[layer], self.cF[layer], norm=True)
self.losses['loss_c'] = self.loss_c
"""style loss"""
self.loss_s = 0
@@ -389,9 +387,8 @@ def backward_G(self):
"""content loss"""
self.loss_c = 0
for layer in self.content_layers:
- self.loss_c += self.calc_content_loss(self.tF[layer],
- self.cF[layer],
- norm=True)
+ self.loss_c += self.calc_content_loss(
+ self.tF[layer], self.cF[layer], norm=True)
self.losses['loss_c'] = self.loss_c
"""style loss"""
self.loss_s = 0
@@ -401,8 +398,8 @@ def backward_G(self):
"""relative loss"""
self.loss_style_remd = self.calc_style_emd_loss(self.tF['r41'],
self.sF['r41'])
- self.loss_content_relt = self.calc_content_relt_loss(
- self.tF['r41'], self.cF['r41'])
+ self.loss_content_relt = self.calc_content_relt_loss(self.tF['r41'],
+ self.cF['r41'])
self.losses['loss_style_remd'] = self.loss_style_remd
self.losses['loss_content_relt'] = self.loss_content_relt
"""gan loss"""
diff --git a/paddlers/models/ppgan/models/mpr_model.py b/paddlers/models/ppgan/models/mpr_model.py
index d88e8f11..fdeea4ff 100644
--- a/paddlers/models/ppgan/models/mpr_model.py
+++ b/paddlers/models/ppgan/models/mpr_model.py
@@ -29,6 +29,7 @@ class MPRModel(BaseModel):
Paper: MPR: Multi-Stage Progressive Image Restoration (CVPR 2021).
https://arxiv.org/abs/2102.02808
"""
+
def __init__(self, generator, char_criterion=None, edge_criterion=None):
"""Initialize the MPR class.
diff --git a/paddlers/models/ppgan/models/msvsr_model.py b/paddlers/models/ppgan/models/msvsr_model.py
index 3ee6fbd3..e51dd277 100644
--- a/paddlers/models/ppgan/models/msvsr_model.py
+++ b/paddlers/models/ppgan/models/msvsr_model.py
@@ -30,6 +30,7 @@ class MultiStageVSRModel(BaseSRModel):
Paper:
PP-MSVSR: Multi-Stage Video Super-Resolution, 2021
"""
+
def __init__(self, generator, fix_iter, pixel_criterion=None):
"""Initialize the PP-MSVSR class.
diff --git a/paddlers/models/ppgan/models/photopen_model.py b/paddlers/models/ppgan/models/photopen_model.py
index af1fab4e..95efad43 100644
--- a/paddlers/models/ppgan/models/photopen_model.py
+++ b/paddlers/models/ppgan/models/photopen_model.py
@@ -28,39 +28,38 @@
@MODELS.register()
class PhotoPenModel(BaseModel):
- def __init__(self,
- generator,
- discriminator,
- criterion,
- label_nc,
- contain_dontcare_label,
- batchSize,
- crop_size,
- lambda_feat,
- ):
+ def __init__(
+ self,
+ generator,
+ discriminator,
+ criterion,
+ label_nc,
+ contain_dontcare_label,
+ batchSize,
+ crop_size,
+ lambda_feat, ):
super(PhotoPenModel, self).__init__()
-
+
opt = {
- 'label_nc': label_nc,
- 'contain_dontcare_label': contain_dontcare_label,
- 'batchSize': batchSize,
- 'crop_size': crop_size,
- 'lambda_feat': lambda_feat,
-# 'semantic_nc': semantic_nc,
-# 'use_vae': use_vae,
-# 'nef': nef,
- }
+ 'label_nc': label_nc,
+ 'contain_dontcare_label': contain_dontcare_label,
+ 'batchSize': batchSize,
+ 'crop_size': crop_size,
+ 'lambda_feat': lambda_feat,
+ # 'semantic_nc': semantic_nc,
+ # 'use_vae': use_vae,
+ # 'nef': nef,
+ }
self.opt = Dict(opt)
-
-
+
# define nets
self.nets['net_gen'] = build_generator(generator)
-# init_weights(self.nets['net_gen'])
+ # init_weights(self.nets['net_gen'])
self.nets['net_des'] = build_discriminator(discriminator)
-# init_weights(self.nets['net_des'])
+ # init_weights(self.nets['net_des'])
self.net_vgg = build_criterion(criterion)
-
+
def setup_input(self, input):
if 'img' in input.keys():
self.img = paddle.to_tensor(input['img'])
@@ -77,48 +76,49 @@ def backward_G(self):
real_data = paddle.concat((self.one_hot, self.img), 1)
fake_and_real_data = paddle.concat((fake_data, real_data), 0)
pred = self.nets['net_des'](fake_and_real_data)
-
"""content loss"""
g_ganloss = 0.
for i in range(len(pred)):
pred_i = pred[i][-1][:self.opt.batchSize]
- new_loss = -pred_i.mean() # hinge loss
+ new_loss = -pred_i.mean() # hinge loss
g_ganloss += new_loss
g_ganloss /= len(pred)
g_featloss = 0.
for i in range(len(pred)):
- for j in range(len(pred[i]) - 1): # 除去最后一层的中间层featuremap
- unweighted_loss = (pred[i][j][:self.opt.batchSize] - pred[i][j][self.opt.batchSize:]).abs().mean() # L1 loss
+ for j in range(len(pred[i]) - 1): # 除去最后一层的中间层featuremap
+ unweighted_loss = (
+ pred[i][j][:self.opt.batchSize] -
+ pred[i][j][self.opt.batchSize:]).abs().mean() # L1 loss
g_featloss += unweighted_loss * self.opt.lambda_feat / len(pred)
-
+
g_vggloss = self.net_vgg(self.img, self.img_f)
self.g_loss = g_ganloss + g_featloss + g_vggloss
-
+
self.g_loss.backward()
self.losses['g_ganloss'] = g_ganloss
self.losses['g_featloss'] = g_featloss
self.losses['g_vggloss'] = g_vggloss
-
def backward_D(self):
fake_data = paddle.concat((self.one_hot, self.img_f), 1)
real_data = paddle.concat((self.one_hot, self.img), 1)
fake_and_real_data = paddle.concat((fake_data, real_data), 0)
pred = self.nets['net_des'](fake_and_real_data)
-
"""content loss"""
df_ganloss = 0.
for i in range(len(pred)):
pred_i = pred[i][-1][:self.opt.batchSize]
- new_loss = -paddle.minimum(-pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss
+ new_loss = -paddle.minimum(
+ -pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss
df_ganloss += new_loss
df_ganloss /= len(pred)
dr_ganloss = 0.
for i in range(len(pred)):
pred_i = pred[i][-1][self.opt.batchSize:]
- new_loss = -paddle.minimum(pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss
+ new_loss = -paddle.minimum(
+ pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss
dr_ganloss += new_loss
dr_ganloss /= len(pred)
@@ -126,19 +126,18 @@ def backward_D(self):
self.d_loss.backward()
self.losses['df_ganloss'] = df_ganloss
self.losses['dr_ganloss'] = dr_ganloss
-
-
+
def train_iter(self, optimizers=None):
self.forward()
self.optimizers['optimG'].clear_grad()
self.backward_G()
self.optimizers['optimG'].step()
-
+
self.forward()
self.optimizers['optimD'].clear_grad()
self.backward_D()
self.optimizers['optimD'].step()
-
+
def test_iter(self, metrics=None):
self.eval()
with paddle.no_grad():
@@ -159,7 +158,6 @@ def setup_optimizers(self, lr, cfg):
lr = learning_rate * 4
else:
lr = learning_rate
- self.optimizers[opt_name] = build_optimizer(
- cfg_, lr, parameters)
+ self.optimizers[opt_name] = build_optimizer(cfg_, lr, parameters)
return self.optimizers
diff --git a/paddlers/models/ppgan/models/pix2pix_model.py b/paddlers/models/ppgan/models/pix2pix_model.py
index 2c8d5523..a816def2 100644
--- a/paddlers/models/ppgan/models/pix2pix_model.py
+++ b/paddlers/models/ppgan/models/pix2pix_model.py
@@ -31,6 +31,7 @@ class Pix2PixModel(BaseModel):
pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf
"""
+
def __init__(self,
generator,
discriminator=None,
diff --git a/paddlers/models/ppgan/models/sr_model.py b/paddlers/models/ppgan/models/sr_model.py
index 767bf27e..8eebf61a 100644
--- a/paddlers/models/ppgan/models/sr_model.py
+++ b/paddlers/models/ppgan/models/sr_model.py
@@ -27,6 +27,7 @@
class BaseSRModel(BaseModel):
"""Base SR model for single image super-resolution.
"""
+
def __init__(self, generator, pixel_criterion=None, use_init_weight=False):
"""
Args:
@@ -85,8 +86,8 @@ def test_iter(self, metrics=None):
def init_sr_weight(net):
def reset_func(m):
- if hasattr(m, 'weight') and (not isinstance(
- m, (nn.BatchNorm, nn.BatchNorm2D))):
+ if hasattr(m, 'weight') and (
+ not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))):
reset_parameters(m)
net.apply(reset_func)
diff --git a/paddlers/models/ppgan/models/starganv2_model.py b/paddlers/models/ppgan/models/starganv2_model.py
index f7d5e5e6..85acf128 100644
--- a/paddlers/models/ppgan/models/starganv2_model.py
+++ b/paddlers/models/ppgan/models/starganv2_model.py
@@ -88,11 +88,12 @@ def adv_loss(logits, target):
def r1_reg(d_out, x_in):
# zero-centered gradient penalty for real images
batch_size = x_in.shape[0]
- grad_dout = paddle.grad(outputs=d_out.sum(),
- inputs=x_in,
- create_graph=True,
- retain_graph=True,
- only_inputs=True)[0]
+ grad_dout = paddle.grad(
+ outputs=d_out.sum(),
+ inputs=x_in,
+ create_graph=True,
+ retain_graph=True,
+ only_inputs=True)[0]
grad_dout2 = grad_dout.pow(2)
assert (grad_dout2.shape == x_in.shape)
reg = 0.5 * paddle.reshape(grad_dout2, (batch_size, -1)).sum(1).mean(0)
@@ -108,8 +109,8 @@ def soft_update(source, target, beta=1.0):
target_model_map = dict(target.named_parameters())
for param_name, source_param in source.named_parameters():
target_param = target_model_map[param_name]
- target_param.set_value(beta * source_param +
- (1.0 - beta) * target_param)
+ target_param.set_value(beta * source_param + (1.0 - beta) *
+ target_param)
def dump_model(model):
@@ -196,18 +197,17 @@ def he_init(module):
@MODELS.register()
class StarGANv2Model(BaseModel):
def __init__(
- self,
- generator,
- style=None,
- mapping=None,
- discriminator=None,
- fan=None,
- latent_dim=16,
- lambda_reg=1,
- lambda_sty=1,
- lambda_ds=1,
- lambda_cyc=1,
- ):
+ self,
+ generator,
+ style=None,
+ mapping=None,
+ discriminator=None,
+ fan=None,
+ latent_dim=16,
+ lambda_reg=1,
+ lambda_sty=1,
+ lambda_ds=1,
+ lambda_cyc=1, ):
super(StarGANv2Model, self).__init__()
self.w_hpf = generator['w_hpf']
self.nets_ema = {}
@@ -277,69 +277,74 @@ def train_iter(self, optimizers=None):
masks = None
# train the discriminator
- d_loss, d_losses_latent = compute_d_loss(self.nets,
- self.lambda_reg,
- x_real,
- y_org,
- y_trg,
- z_trg=z_trg,
- masks=masks)
+ d_loss, d_losses_latent = compute_d_loss(
+ self.nets,
+ self.lambda_reg,
+ x_real,
+ y_org,
+ y_trg,
+ z_trg=z_trg,
+ masks=masks)
self._reset_grad(optimizers)
d_loss.backward()
optimizers['discriminator'].minimize(d_loss)
- d_loss, d_losses_ref = compute_d_loss(self.nets,
- self.lambda_reg,
- x_real,
- y_org,
- y_trg,
- x_ref=x_ref,
- masks=masks)
+ d_loss, d_losses_ref = compute_d_loss(
+ self.nets,
+ self.lambda_reg,
+ x_real,
+ y_org,
+ y_trg,
+ x_ref=x_ref,
+ masks=masks)
self._reset_grad(optimizers)
d_loss.backward()
optimizers['discriminator'].step()
# train the generator
- g_loss, g_losses_latent = compute_g_loss(self.nets,
- self.w_hpf,
- self.lambda_sty,
- self.lambda_ds,
- self.lambda_cyc,
- x_real,
- y_org,
- y_trg,
- z_trgs=[z_trg, z_trg2],
- masks=masks)
+ g_loss, g_losses_latent = compute_g_loss(
+ self.nets,
+ self.w_hpf,
+ self.lambda_sty,
+ self.lambda_ds,
+ self.lambda_cyc,
+ x_real,
+ y_org,
+ y_trg,
+ z_trgs=[z_trg, z_trg2],
+ masks=masks)
self._reset_grad(optimizers)
g_loss.backward()
optimizers['generator'].step()
optimizers['mapping_network'].step()
optimizers['style_encoder'].step()
- g_loss, g_losses_ref = compute_g_loss(self.nets,
- self.w_hpf,
- self.lambda_sty,
- self.lambda_ds,
- self.lambda_cyc,
- x_real,
- y_org,
- y_trg,
- x_refs=[x_ref, x_ref2],
- masks=masks)
+ g_loss, g_losses_ref = compute_g_loss(
+ self.nets,
+ self.w_hpf,
+ self.lambda_sty,
+ self.lambda_ds,
+ self.lambda_cyc,
+ x_real,
+ y_org,
+ y_trg,
+ x_refs=[x_ref, x_ref2],
+ masks=masks)
self._reset_grad(optimizers)
g_loss.backward()
optimizers['generator'].step()
# compute moving average of network parameters
- soft_update(self.nets['generator'],
- self.nets_ema['generator'],
- beta=0.999)
- soft_update(self.nets['mapping_network'],
- self.nets_ema['mapping_network'],
- beta=0.999)
- soft_update(self.nets['style_encoder'],
- self.nets_ema['style_encoder'],
- beta=0.999)
+ soft_update(
+ self.nets['generator'], self.nets_ema['generator'], beta=0.999)
+ soft_update(
+ self.nets['mapping_network'],
+ self.nets_ema['mapping_network'],
+ beta=0.999)
+ soft_update(
+ self.nets['style_encoder'],
+ self.nets_ema['style_encoder'],
+ beta=0.999)
# decay weight for diversity sensitive loss
if self.lambda_ds > 0:
@@ -357,15 +362,16 @@ def test_iter(self, metrics=None):
#TODO
self.nets_ema['generator'].eval()
self.nets_ema['style_encoder'].eval()
- soft_update(self.nets['generator'],
- self.nets_ema['generator'],
- beta=0.999)
- soft_update(self.nets['mapping_network'],
- self.nets_ema['mapping_network'],
- beta=0.999)
- soft_update(self.nets['style_encoder'],
- self.nets_ema['style_encoder'],
- beta=0.999)
+ soft_update(
+ self.nets['generator'], self.nets_ema['generator'], beta=0.999)
+ soft_update(
+ self.nets['mapping_network'],
+ self.nets_ema['mapping_network'],
+ beta=0.999)
+ soft_update(
+ self.nets['style_encoder'],
+ self.nets_ema['style_encoder'],
+ beta=0.999)
src_img = self.input['src']
ref_img = self.input['ref']
ref_label = self.input['ref_cls']
diff --git a/paddlers/models/ppgan/models/styleganv2_model.py b/paddlers/models/ppgan/models/styleganv2_model.py
index 71f33b95..d7b70158 100644
--- a/paddlers/models/ppgan/models/styleganv2_model.py
+++ b/paddlers/models/ppgan/models/styleganv2_model.py
@@ -25,7 +25,6 @@
from ..solver import build_lr_scheduler, build_optimizer
-
def r1_penalty(real_pred, real_img):
"""
R1 regularization for discriminator. The core idea is to
@@ -40,27 +39,25 @@ def r1_penalty(real_pred, real_img):
Eq. 9 in Which training methods for GANs do actually converge.
"""
- grad_real = paddle.grad(outputs=real_pred.sum(),
- inputs=real_img,
- create_graph=True)[0]
+ grad_real = paddle.grad(
+ outputs=real_pred.sum(), inputs=real_img, create_graph=True)[0]
grad_penalty = (grad_real * grad_real).reshape([grad_real.shape[0],
-1]).sum(1).mean()
return grad_penalty
def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01):
- noise = paddle.randn(fake_img.shape) / math.sqrt(
- fake_img.shape[2] * fake_img.shape[3])
- grad = paddle.grad(outputs=(fake_img * noise).sum(),
- inputs=latents,
- create_graph=True)[0]
+ noise = paddle.randn(fake_img.shape) / math.sqrt(fake_img.shape[2] *
+ fake_img.shape[3])
+ grad = paddle.grad(
+ outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True)[0]
path_lengths = paddle.sqrt((grad * grad).sum(2).mean(1))
path_mean = mean_path_length + decay * (path_lengths.mean() -
mean_path_length)
- path_penalty = ((path_lengths - path_mean) *
- (path_lengths - path_mean)).mean()
+ path_penalty = (
+ (path_lengths - path_mean) * (path_lengths - path_mean)).mean()
return path_penalty, path_lengths.detach().mean(), path_mean.detach()
@@ -72,6 +69,7 @@ class StyleGAN2Model(BaseModel):
StyleGAN2 paper: https://arxiv.org/pdf/1912.04958.pdf
"""
+
def __init__(self,
generator,
discriminator=None,
@@ -95,11 +93,11 @@ def __init__(self,
'gen_iters', 4)
self.disc_iters = 16 if self.params is None else self.params.get(
'disc_iters', 16)
- self.disc_start_iters = (0 if self.params is None else self.params.get(
- 'disc_start_iters', 0))
+ self.disc_start_iters = (0 if self.params is None else
+ self.params.get('disc_start_iters', 0))
- self.visual_iters = (500 if self.params is None else self.params.get(
- 'visual_iters', 500))
+ self.visual_iters = (500 if self.params is None else
+ self.params.get('visual_iters', 500))
self.mixing_prob = mixing_prob
self.num_style_feat = num_style_feat
@@ -174,8 +172,8 @@ def model_ema(self, decay=0.999):
net_g_ema_params = dict(neg_g_ema.named_parameters())
for k in net_g_ema_params.keys():
- net_g_ema_params[k].set_value(net_g_ema_params[k] * (decay) +
- (net_g_params[k] * (1 - decay)))
+ net_g_ema_params[k].set_value(net_g_ema_params[k] * (decay) + (
+ net_g_params[k] * (1 - decay)))
def setup_input(self, input):
"""Unpack input data from the dataloader and perform necessary pre-processing steps.
@@ -220,9 +218,9 @@ def train_iter(self, optimizers=None):
real_pred = self.nets['disc'](self.real_img)
# wgan loss with softplus (logistic loss) for discriminator
l_d_total = 0.
- l_d = self.gan_criterion(real_pred, True,
- is_disc=True) + self.gan_criterion(
- fake_pred, False, is_disc=True)
+ l_d = self.gan_criterion(
+ real_pred, True, is_disc=True) + self.gan_criterion(
+ fake_pred, False, is_disc=True)
self.losses['l_d'] = l_d
# In wgan, real_score should be positive and fake_score should be
# negative
@@ -235,8 +233,8 @@ def train_iter(self, optimizers=None):
self.real_img.stop_gradient = False
real_pred = self.nets['disc'](self.real_img)
l_d_r1 = r1_penalty(real_pred, self.real_img)
- l_d_r1 = (self.r1_reg_weight / 2 * l_d_r1 * self.disc_iters +
- 0 * real_pred[0])
+ l_d_r1 = (self.r1_reg_weight / 2 * l_d_r1 * self.disc_iters + 0 *
+ real_pred[0])
self.losses['l_d_r1'] = l_d_r1.detach().mean()
@@ -265,8 +263,8 @@ def train_iter(self, optimizers=None):
l_g_path, path_lengths, self.mean_path_length = g_path_regularize(
fake_img, latents, self.mean_path_length)
- l_g_path = (self.path_reg_weight * self.gen_iters * l_g_path +
- 0 * fake_img[0, 0, 0, 0])
+ l_g_path = (self.path_reg_weight * self.gen_iters * l_g_path + 0 *
+ fake_img[0, 0, 0, 0])
l_g_total += l_g_path
self.losses['l_g_path'] = l_g_path.detach().mean()
@@ -301,9 +299,10 @@ def set_generator(self, generator):
def forward(self, style, truncation):
truncation_latent = self.generator.get_mean_style()
- out = self.generator(styles=style,
- truncation=truncation,
- truncation_latent=truncation_latent)
+ out = self.generator(
+ styles=style,
+ truncation=truncation,
+ truncation_latent=truncation_latent)
return out[0]
def export_model(self,
@@ -316,6 +315,7 @@ def export_model(self,
truncation = paddle.rand(shape=inputs_size[1], dtype='float32')
if output_dir is None:
output_dir = 'inference_model'
- paddle.jit.save(infer_generator,
- os.path.join(output_dir, "stylegan2model_gen"),
- input_spec=[style, truncation])
+ paddle.jit.save(
+ infer_generator,
+ os.path.join(output_dir, "stylegan2model_gen"),
+ input_spec=[style, truncation])
diff --git a/paddlers/models/ppgan/models/ugatit_model.py b/paddlers/models/ppgan/models/ugatit_model.py
index 007dde19..d315a457 100644
--- a/paddlers/models/ppgan/models/ugatit_model.py
+++ b/paddlers/models/ppgan/models/ugatit_model.py
@@ -34,6 +34,7 @@ class UGATITModel(BaseModel):
UGATIT paper: https://arxiv.org/pdf/1907.10830.pdf
"""
+
def __init__(self,
generator,
discriminator_g=None,
@@ -154,6 +155,7 @@ def test_iter(self, metrics=None):
def train_iter(self, optimizers=None):
"""Calculate losses, gradients, and update network weights; called in every training iteration"""
+
def _criterion(loss_func, logit, is_real):
if is_real:
target = paddle.ones_like(logit)
@@ -177,42 +179,42 @@ def _criterion(loss_func, logit, is_real):
fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](self.fake_A2B)
fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](self.fake_A2B)
- D_ad_loss_GA = _criterion(self.MSE_loss,
- real_GA_logit, True) + _criterion(
- self.MSE_loss, fake_GA_logit, False)
+ D_ad_loss_GA = _criterion(self.MSE_loss, real_GA_logit,
+ True) + _criterion(self.MSE_loss,
+ fake_GA_logit, False)
D_ad_cam_loss_GA = _criterion(
self.MSE_loss, real_GA_cam_logit, True) + _criterion(
self.MSE_loss, fake_GA_cam_logit, False)
- D_ad_loss_LA = _criterion(self.MSE_loss,
- real_LA_logit, True) + _criterion(
- self.MSE_loss, fake_LA_logit, False)
+ D_ad_loss_LA = _criterion(self.MSE_loss, real_LA_logit,
+ True) + _criterion(self.MSE_loss,
+ fake_LA_logit, False)
D_ad_cam_loss_LA = _criterion(
self.MSE_loss, real_LA_cam_logit, True) + _criterion(
self.MSE_loss, fake_LA_cam_logit, False)
- D_ad_loss_GB = _criterion(self.MSE_loss,
- real_GB_logit, True) + _criterion(
- self.MSE_loss, fake_GB_logit, False)
+ D_ad_loss_GB = _criterion(self.MSE_loss, real_GB_logit,
+ True) + _criterion(self.MSE_loss,
+ fake_GB_logit, False)
D_ad_cam_loss_GB = _criterion(
self.MSE_loss, real_GB_cam_logit, True) + _criterion(
self.MSE_loss, fake_GB_cam_logit, False)
- D_ad_loss_LB = _criterion(self.MSE_loss,
- real_LB_logit, True) + _criterion(
- self.MSE_loss, fake_LB_logit, False)
+ D_ad_loss_LB = _criterion(self.MSE_loss, real_LB_logit,
+ True) + _criterion(self.MSE_loss,
+ fake_LB_logit, False)
D_ad_cam_loss_LB = _criterion(
self.MSE_loss, real_LB_cam_logit, True) + _criterion(
self.MSE_loss, fake_LB_cam_logit, False)
- D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA +
- D_ad_loss_LA + D_ad_cam_loss_LA)
- D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB +
- D_ad_loss_LB + D_ad_cam_loss_LB)
+ D_loss_A = self.adv_weight * (
+ D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA)
+ D_loss_B = self.adv_weight * (
+ D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB)
Discriminator_loss = D_loss_A + D_loss_B
Discriminator_loss.backward()
@@ -250,13 +252,13 @@ def _criterion(loss_func, logit, is_real):
G_identity_loss_A = self.L1_loss(fake_A2A, self.real_A)
G_identity_loss_B = self.L1_loss(fake_B2B, self.real_B)
- G_cam_loss_A = _criterion(self.BCE_loss,
- fake_B2A_cam_logit, True) + _criterion(
- self.BCE_loss, fake_A2A_cam_logit, False)
+ G_cam_loss_A = _criterion(self.BCE_loss, fake_B2A_cam_logit,
+ True) + _criterion(self.BCE_loss,
+ fake_A2A_cam_logit, False)
- G_cam_loss_B = _criterion(self.BCE_loss,
- fake_A2B_cam_logit, True) + _criterion(
- self.BCE_loss, fake_B2B_cam_logit, False)
+ G_cam_loss_B = _criterion(self.BCE_loss, fake_A2B_cam_logit,
+ True) + _criterion(self.BCE_loss,
+ fake_B2B_cam_logit, False)
G_loss_A = self.adv_weight * (
G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA
diff --git a/paddlers/models/ppgan/models/wav2lip_hq_model.py b/paddlers/models/ppgan/models/wav2lip_hq_model.py
index 034e81f9..23d6065b 100644
--- a/paddlers/models/ppgan/models/wav2lip_hq_model.py
+++ b/paddlers/models/ppgan/models/wav2lip_hq_model.py
@@ -37,6 +37,7 @@ class Wav2LipModelHq(BaseModel):
By default, it uses a '--netG Wav2lip' generator,
a '--netD SyncNetColor' discriminator.
"""
+
def __init__(self,
generator,
discriminator_sync=None,
@@ -61,9 +62,8 @@ def __init__(self,
self.eval_perceptual_losses = []
# define networks (both generator and discriminator)
self.nets['netG'] = build_generator(generator)
- init_weights(self.nets['netG'],
- init_type='kaiming',
- distribution='uniform')
+ init_weights(
+ self.nets['netG'], init_type='kaiming', distribution='uniform')
if self.is_train:
self.nets['netDS'] = build_discriminator(discriminator_sync)
weights_path = get_weights_path_from_url(SYNCNET_WEIGHT_URL)
@@ -71,9 +71,8 @@ def __init__(self,
self.nets['netDS'].load_dict(params)
self.nets['netDH'] = build_discriminator(discriminator_hq)
- init_weights(self.nets['netDH'],
- init_type='kaiming',
- distribution='uniform')
+ init_weights(
+ self.nets['netDH'], init_type='kaiming', distribution='uniform')
if self.is_train:
self.recon_loss = paddle.nn.L1Loss()
@@ -103,8 +102,9 @@ def backward_G(self):
self.l1_loss = self.recon_loss(self.g, self.y)
if self.disc_wt > 0.:
- if isinstance(self.nets['netDH'], paddle.DataParallel
- ): #paddle.fluid.dygraph.parallel.DataParallel)
+ if isinstance(
+ self.nets['netDH'], paddle.
+ DataParallel): #paddle.fluid.dygraph.parallel.DataParallel)
self.perceptual_loss = self.nets[
'netDH']._layers.perceptual_forward(self.g)
else:
@@ -163,10 +163,12 @@ def test_iter(self, metrics=None):
pred_real = self.nets['netDH'](self.y)
pred_fake = self.nets['netDH'](self.g)
- disc_real_loss = F.binary_cross_entropy(
- pred_real, paddle.ones((len(pred_real), 1)))
- disc_fake_loss = F.binary_cross_entropy(
- pred_fake, paddle.zeros((len(pred_fake), 1)))
+ disc_real_loss = F.binary_cross_entropy(pred_real,
+ paddle.ones(
+ (len(pred_real), 1)))
+ disc_fake_loss = F.binary_cross_entropy(pred_fake,
+ paddle.zeros(
+ (len(pred_fake), 1)))
self.eval_disc_fake_losses.append(disc_fake_loss.numpy().item())
self.eval_disc_real_losses.append(disc_real_loss.numpy().item())
@@ -178,8 +180,8 @@ def test_iter(self, metrics=None):
if isinstance(self.nets['netDH'], paddle.DataParallel
): #paddle.fluid.dygraph.parallel.DataParallel)
perceptual_loss = self.nets[
- 'netDH']._layers.perceptual_forward(
- self.g).numpy().item()
+ 'netDH']._layers.perceptual_forward(self.g).numpy(
+ ).item()
else:
perceptual_loss = self.nets['netDH'].perceptual_forward(
self.g).numpy().item()
@@ -201,11 +203,10 @@ def test_iter(self, metrics=None):
if averaged_sync_loss < .75:
self.syncnet_wt = 0.01
- print(
- 'L1: {}, Sync loss: {}, Percep: {}, Fake: {}, Real: {}'.format(
- averaged_recon_loss, averaged_sync_loss,
- averaged_perceptual_loss, averaged_disc_fake_loss,
- averaged_disc_real_loss))
+ print('L1: {}, Sync loss: {}, Percep: {}, Fake: {}, Real: {}'.
+ format(averaged_recon_loss, averaged_sync_loss,
+ averaged_perceptual_loss, averaged_disc_fake_loss,
+ averaged_disc_real_loss))
self.eval_sync_losses, self.eval_recon_losses = [], []
self.eval_disc_real_losses, self.eval_disc_fake_losses = [], []
self.eval_perceptual_losses = []
diff --git a/paddlers/models/ppgan/models/wav2lip_model.py b/paddlers/models/ppgan/models/wav2lip_model.py
index 852d25be..2a4d2b05 100644
--- a/paddlers/models/ppgan/models/wav2lip_model.py
+++ b/paddlers/models/ppgan/models/wav2lip_model.py
@@ -54,6 +54,7 @@ class Wav2LipModel(BaseModel):
By default, it uses a '--netG Wav2lip' generator,
a '--netD SyncNetColor' discriminator.
"""
+
def __init__(self,
generator,
discriminator=None,
@@ -110,8 +111,8 @@ def backward_G(self):
self.losses['sync_loss'] = self.sync_loss
self.losses['l1_loss'] = self.l1_loss
- self.loss_G = self.syncnet_wt * self.sync_loss + (
- 1 - self.syncnet_wt) * self.l1_loss
+ self.loss_G = self.syncnet_wt * self.sync_loss + (1 - self.syncnet_wt
+ ) * self.l1_loss
self.loss_G.backward()
def train_iter(self, optimizers=None):
diff --git a/paddlers/models/ppgan/modules/caffevgg.py b/paddlers/models/ppgan/modules/caffevgg.py
index b0780899..af16b738 100644
--- a/paddlers/models/ppgan/modules/caffevgg.py
+++ b/paddlers/models/ppgan/modules/caffevgg.py
@@ -15,14 +15,13 @@ class CaffeVGG19(nn.Layer):
'M', 512, 512, 512, 512, 'M'
]
- def __init__(self, output_index: int = 26) -> None:
+ def __init__(self, output_index: int=26) -> None:
super().__init__()
arch = 'caffevgg19'
weights_path = get_path_from_url(model_urls[arch][0],
model_urls[arch][1])
- data_dict: dict = np.load(weights_path,
- encoding='latin1',
- allow_pickle=True).item()
+ data_dict: dict = np.load(
+ weights_path, encoding='latin1', allow_pickle=True).item()
self.features = self.make_layers(self.cfg, data_dict)
del data_dict
self.features = nn.Sequential(*self.features.sublayers()[:output_index])
diff --git a/paddlers/models/ppgan/modules/dense_motion.py b/paddlers/models/ppgan/modules/dense_motion.py
index 131d6f4b..6217c516 100644
--- a/paddlers/models/ppgan/modules/dense_motion.py
+++ b/paddlers/models/ppgan/modules/dense_motion.py
@@ -13,6 +13,7 @@ class DenseMotionNetwork(nn.Layer):
"""
Module that predicting a dense motion from sparse motion representation given by kp_source and kp_driving
"""
+
def __init__(self,
block_expansion,
num_blocks,
@@ -24,64 +25,71 @@ def __init__(self,
kp_variance=0.01,
mobile_net=False):
super(DenseMotionNetwork, self).__init__()
- self.hourglass = Hourglass(block_expansion=block_expansion,
- in_features=(num_kp + 1) *
- (num_channels + 1),
- max_features=max_features,
- num_blocks=num_blocks,
- mobile_net=mobile_net)
+ self.hourglass = Hourglass(
+ block_expansion=block_expansion,
+ in_features=(num_kp + 1) * (num_channels + 1),
+ max_features=max_features,
+ num_blocks=num_blocks,
+ mobile_net=mobile_net)
if mobile_net:
self.mask = nn.Sequential(
- nn.Conv2D(self.hourglass.out_filters,
- self.hourglass.out_filters,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1),
- nn.ReLU(),
- nn.Conv2D(self.hourglass.out_filters,
- self.hourglass.out_filters,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1),
- nn.ReLU(),
- nn.Conv2D(self.hourglass.out_filters,
- num_kp + 1,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1))
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ self.hourglass.out_filters,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1),
+ nn.ReLU(),
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ self.hourglass.out_filters,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1),
+ nn.ReLU(),
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ num_kp + 1,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1))
else:
- self.mask = nn.Conv2D(self.hourglass.out_filters,
- num_kp + 1,
- kernel_size=(7, 7),
- padding=(3, 3))
+ self.mask = nn.Conv2D(
+ self.hourglass.out_filters,
+ num_kp + 1,
+ kernel_size=(7, 7),
+ padding=(3, 3))
if estimate_occlusion_map:
if mobile_net:
- self.occlusion = nn.Sequential(
- nn.Conv2D(self.hourglass.out_filters,
- self.hourglass.out_filters,
- kernel_size=3,
- padding=1,
- weight_attr=nn.initializer.KaimingUniform()),
+ self.occlusion = nn.Sequential(
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ self.hourglass.out_filters,
+ kernel_size=3,
+ padding=1,
+ weight_attr=nn.initializer.KaimingUniform()),
nn.ReLU(),
- nn.Conv2D(self.hourglass.out_filters,
- self.hourglass.out_filters,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=1),
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ self.hourglass.out_filters,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=1),
nn.ReLU(),
- nn.Conv2D(self.hourglass.out_filters,
- 1,
- kernel_size=3,
- padding=1,
- weight_attr=nn.initializer.KaimingUniform())
- )
+ nn.Conv2D(
+ self.hourglass.out_filters,
+ 1,
+ kernel_size=3,
+ padding=1,
+ weight_attr=nn.initializer.KaimingUniform()))
else:
- self.occlusion = nn.Conv2D(self.hourglass.out_filters,
- 1,
- kernel_size=(7, 7),
- padding=(3, 3))
+ self.occlusion = nn.Conv2D(
+ self.hourglass.out_filters,
+ 1,
+ kernel_size=(7, 7),
+ padding=(3, 3))
else:
self.occlusion = None
@@ -90,9 +98,8 @@ def __init__(self,
self.kp_variance = kp_variance
if self.scale_factor != 1:
- self.down = AntiAliasInterpolation2d(num_channels,
- self.scale_factor,
- mobile_net=mobile_net)
+ self.down = AntiAliasInterpolation2d(
+ num_channels, self.scale_factor, mobile_net=mobile_net)
def create_heatmap_representations(self, source_image, kp_driving,
kp_source):
@@ -100,12 +107,10 @@ def create_heatmap_representations(self, source_image, kp_driving,
Eq 6. in the paper H_k(z)
"""
spatial_size = source_image.shape[2:]
- gaussian_driving = kp2gaussian(kp_driving,
- spatial_size=spatial_size,
- kp_variance=self.kp_variance)
- gaussian_source = kp2gaussian(kp_source,
- spatial_size=spatial_size,
- kp_variance=self.kp_variance)
+ gaussian_driving = kp2gaussian(
+ kp_driving, spatial_size=spatial_size, kp_variance=self.kp_variance)
+ gaussian_source = kp2gaussian(
+ kp_source, spatial_size=spatial_size, kp_variance=self.kp_variance)
heatmap = gaussian_driving - gaussian_source
#adding background feature
@@ -121,8 +126,8 @@ def create_sparse_motions(self, source_image, kp_driving, kp_source):
Eq 4. in the paper T_{s<-d}(z)
"""
bs, _, h, w = source_image.shape
- identity_grid = make_coordinate_grid((h, w),
- type=kp_source['value'].dtype)
+ identity_grid = make_coordinate_grid(
+ (h, w), type=kp_source['value'].dtype)
identity_grid = identity_grid.reshape([1, 1, h, w, 2])
coordinate_grid = identity_grid - kp_driving['value'].reshape(
[bs, self.num_kp, 1, 1, 2])
@@ -146,8 +151,8 @@ def create_sparse_motions(self, source_image, kp_driving, kp_source):
#adding background feature
identity_grid = paddle.tile(identity_grid, (bs, 1, 1, 1, 1))
- sparse_motions = paddle.concat([identity_grid, driving_to_source],
- axis=1)
+ sparse_motions = paddle.concat(
+ [identity_grid, driving_to_source], axis=1)
return sparse_motions
def create_deformed_source_image(self, source_image, sparse_motions):
@@ -157,17 +162,18 @@ def create_deformed_source_image(self, source_image, sparse_motions):
bs, _, h, w = source_image.shape
source_repeat = paddle.tile(
source_image.unsqueeze(1).unsqueeze(1),
- [1, self.num_kp + 1, 1, 1, 1, 1
- ]) #.repeat(1, self.num_kp + 1, 1, 1, 1, 1)
+ [1, self.num_kp + 1, 1, 1, 1,
+ 1]) #.repeat(1, self.num_kp + 1, 1, 1, 1, 1)
source_repeat = source_repeat.reshape(
[bs * (self.num_kp + 1), -1, h, w])
sparse_motions = sparse_motions.reshape(
(bs * (self.num_kp + 1), h, w, -1))
- sparse_deformed = F.grid_sample(source_repeat,
- sparse_motions,
- mode='bilinear',
- padding_mode='zeros',
- align_corners=True)
+ sparse_deformed = F.grid_sample(
+ source_repeat,
+ sparse_motions,
+ mode='bilinear',
+ padding_mode='zeros',
+ align_corners=True)
sparse_deformed = sparse_deformed.reshape(
(bs, self.num_kp + 1, -1, h, w))
return sparse_deformed
@@ -183,8 +189,8 @@ def forward(self, source_image, kp_driving, kp_source):
source_image, kp_driving, kp_source)
sparse_motion = self.create_sparse_motions(source_image, kp_driving,
kp_source)
- deformed_source = self.create_deformed_source_image(
- source_image, sparse_motion)
+ deformed_source = self.create_deformed_source_image(source_image,
+ sparse_motion)
out_dict['sparse_deformed'] = deformed_source
temp = paddle.concat([heatmap_representation, deformed_source], axis=2)
diff --git a/paddlers/models/ppgan/modules/first_order.py b/paddlers/models/ppgan/modules/first_order.py
index b6b113bc..e551bc53 100644
--- a/paddlers/models/ppgan/modules/first_order.py
+++ b/paddlers/models/ppgan/modules/first_order.py
@@ -18,6 +18,7 @@ class ImagePyramide(nn.Layer):
"""
Create image pyramide for computing pyramide perceptual loss. See Sec 3.3
"""
+
def __init__(self, scales, num_channels):
super(ImagePyramide, self).__init__()
self.downs = paddle.nn.LayerList()
@@ -31,8 +32,8 @@ def __init__(self, scales, num_channels):
def forward(self, x):
out_dict = {}
for scale, down_module in zip(self.name_list, self.downs):
- out_dict['prediction_' +
- str(scale).replace('-', '.')] = down_module(x)
+ out_dict['prediction_' + str(scale).replace('-',
+ '.')] = down_module(x)
return out_dict
@@ -87,16 +88,19 @@ class ResBlock2d(nn.Layer):
"""
Res block, preserve spatial resolution.
"""
+
def __init__(self, in_features, kernel_size, padding):
super(ResBlock2d, self).__init__()
- self.conv1 = nn.Conv2D(in_channels=in_features,
- out_channels=in_features,
- kernel_size=kernel_size,
- padding=padding)
- self.conv2 = nn.Conv2D(in_channels=in_features,
- out_channels=in_features,
- kernel_size=kernel_size,
- padding=padding)
+ self.conv1 = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=in_features,
+ kernel_size=kernel_size,
+ padding=padding)
+ self.conv2 = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=in_features,
+ kernel_size=kernel_size,
+ padding=padding)
self.norm1 = SyncBatchNorm(in_features)
self.norm2 = SyncBatchNorm(in_features)
@@ -115,25 +119,29 @@ class MobileResBlock2d(nn.Layer):
"""
Res block, preserve spatial resolution.
"""
+
def __init__(self, in_features, kernel_size, padding):
super(MobileResBlock2d, self).__init__()
out_features = in_features * 2
- self.conv_pw = nn.Conv2D(in_channels=in_features,
- out_channels=out_features,
- kernel_size=1,
- padding=0,
- bias_attr=False)
- self.conv_dw = nn.Conv2D(in_channels=out_features,
- out_channels=out_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=out_features,
- bias_attr=False)
- self.conv_pw_linear = nn.Conv2D(in_channels=out_features,
- out_channels=in_features,
- kernel_size=1,
- padding=0,
- bias_attr=False)
+ self.conv_pw = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=out_features,
+ kernel_size=1,
+ padding=0,
+ bias_attr=False)
+ self.conv_dw = nn.Conv2D(
+ in_channels=out_features,
+ out_channels=out_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=out_features,
+ bias_attr=False)
+ self.conv_pw_linear = nn.Conv2D(
+ in_channels=out_features,
+ out_channels=in_features,
+ kernel_size=1,
+ padding=0,
+ bias_attr=False)
self.norm1 = SyncBatchNorm(in_features)
self.norm_pw = SyncBatchNorm(out_features)
self.norm_dw = SyncBatchNorm(out_features)
@@ -159,6 +167,7 @@ class UpBlock2d(nn.Layer):
"""
Upsampling block for use in decoder.
"""
+
def __init__(self,
in_features,
out_features,
@@ -167,11 +176,12 @@ def __init__(self,
groups=1):
super(UpBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_channels=in_features,
- out_channels=out_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=groups)
+ self.conv = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=out_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=groups)
self.norm = SyncBatchNorm(out_features)
def forward(self, x):
@@ -186,6 +196,7 @@ class MobileUpBlock2d(nn.Layer):
"""
Upsampling block for use in decoder.
"""
+
def __init__(self,
in_features,
out_features,
@@ -194,17 +205,19 @@ def __init__(self,
groups=1):
super(MobileUpBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_channels=in_features,
- out_channels=in_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=in_features,
- bias_attr=False)
- self.conv1 = nn.Conv2D(in_channels=in_features,
- out_channels=out_features,
- kernel_size=1,
- padding=0,
- bias_attr=False)
+ self.conv = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=in_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=in_features,
+ bias_attr=False)
+ self.conv1 = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=out_features,
+ kernel_size=1,
+ padding=0,
+ bias_attr=False)
self.norm = SyncBatchNorm(in_features)
self.norm1 = SyncBatchNorm(out_features)
@@ -223,6 +236,7 @@ class DownBlock2d(nn.Layer):
"""
Downsampling block for use in encoder.
"""
+
def __init__(self,
in_features,
out_features,
@@ -230,11 +244,12 @@ def __init__(self,
padding=1,
groups=1):
super(DownBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_channels=in_features,
- out_channels=out_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=groups)
+ self.conv = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=out_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=groups)
self.norm = SyncBatchNorm(out_features)
self.pool = nn.AvgPool2D(kernel_size=(2, 2))
@@ -250,6 +265,7 @@ class MobileDownBlock2d(nn.Layer):
"""
Downsampling block for use in encoder.
"""
+
def __init__(self,
in_features,
out_features,
@@ -257,21 +273,23 @@ def __init__(self,
padding=1,
groups=1):
super(MobileDownBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_channels=in_features,
- out_channels=in_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=in_features,
- bias_attr=False)
+ self.conv = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=in_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=in_features,
+ bias_attr=False)
self.norm = SyncBatchNorm(in_features)
self.pool = nn.AvgPool2D(kernel_size=(2, 2))
- self.conv1 = nn.Conv2D(in_features,
- out_features,
- kernel_size=1,
- padding=0,
- stride=1,
- bias_attr=False)
+ self.conv1 = nn.Conv2D(
+ in_features,
+ out_features,
+ kernel_size=1,
+ padding=0,
+ stride=1,
+ bias_attr=False)
self.norm1 = SyncBatchNorm(out_features)
def forward(self, x):
@@ -289,6 +307,7 @@ class SameBlock2d(nn.Layer):
"""
Simple block, preserve spatial resolution.
"""
+
def __init__(self,
in_features,
out_features,
@@ -297,13 +316,14 @@ def __init__(self,
padding=1,
mobile_net=False):
super(SameBlock2d, self).__init__()
- self.conv = nn.Conv2D(in_channels=in_features,
- out_channels=out_features,
- kernel_size=kernel_size,
- padding=padding,
- groups=groups,
- bias_attr=(mobile_net == False),
- weight_attr=nn.initializer.KaimingUniform())
+ self.conv = nn.Conv2D(
+ in_channels=in_features,
+ out_channels=out_features,
+ kernel_size=kernel_size,
+ padding=padding,
+ groups=groups,
+ bias_attr=(mobile_net == False),
+ weight_attr=nn.initializer.KaimingUniform())
self.norm = SyncBatchNorm(out_features)
def forward(self, x):
@@ -317,6 +337,7 @@ class Encoder(nn.Layer):
"""
Hourglass Encoder
"""
+
def __init__(self,
block_expansion,
in_features,
@@ -329,20 +350,20 @@ def __init__(self,
for i in range(num_blocks):
if mobile_net:
down_blocks.append(
- MobileDownBlock2d(in_features if i == 0 else min(
- max_features, block_expansion * (2**i)),
- min(max_features,
- block_expansion * (2**(i + 1))),
- kernel_size=3,
- padding=1))
+ MobileDownBlock2d(
+ in_features if i == 0 else min(
+ max_features, block_expansion * (2**i)),
+ min(max_features, block_expansion * (2**(i + 1))),
+ kernel_size=3,
+ padding=1))
else:
down_blocks.append(
- DownBlock2d(in_features if i == 0 else min(
- max_features, block_expansion * (2**i)),
- min(max_features,
- block_expansion * (2**(i + 1))),
- kernel_size=3,
- padding=1))
+ DownBlock2d(
+ in_features if i == 0 else min(
+ max_features, block_expansion * (2**i)),
+ min(max_features, block_expansion * (2**(i + 1))),
+ kernel_size=3,
+ padding=1))
self.down_blocks = nn.LayerList(down_blocks)
def forward(self, x):
@@ -356,6 +377,7 @@ class Decoder(nn.Layer):
"""
Hourglass Decoder
"""
+
def __init__(self,
block_expansion,
in_features,
@@ -372,16 +394,14 @@ def __init__(self,
in_filters = (1 if i == num_blocks - 1 else 2) * min(
max_features, block_expansion * (2**(i + 1)))
up_blocks.append(
- MobileUpBlock2d(in_filters,
- out_filters,
- kernel_size=3,
- padding=1))
+ MobileUpBlock2d(
+ in_filters, out_filters, kernel_size=3, padding=1))
else:
in_filters = (1 if i == num_blocks - 1 else 2) * min(
max_features, block_expansion * (2**(i + 1)))
up_blocks.append(
- UpBlock2d(in_filters, out_filters, kernel_size=3,
- padding=1))
+ UpBlock2d(
+ in_filters, out_filters, kernel_size=3, padding=1))
self.up_blocks = nn.LayerList(up_blocks)
self.out_filters = block_expansion + in_features
@@ -399,6 +419,7 @@ class Hourglass(nn.Layer):
"""
Hourglass architecture.
"""
+
def __init__(self,
block_expansion,
in_features,
@@ -406,16 +427,18 @@ def __init__(self,
max_features=256,
mobile_net=False):
super(Hourglass, self).__init__()
- self.encoder = Encoder(block_expansion,
- in_features,
- num_blocks,
- max_features,
- mobile_net=mobile_net)
- self.decoder = Decoder(block_expansion,
- in_features,
- num_blocks,
- max_features,
- mobile_net=mobile_net)
+ self.encoder = Encoder(
+ block_expansion,
+ in_features,
+ num_blocks,
+ max_features,
+ mobile_net=mobile_net)
+ self.decoder = Decoder(
+ block_expansion,
+ in_features,
+ num_blocks,
+ max_features,
+ mobile_net=mobile_net)
self.out_filters = self.decoder.out_filters
def forward(self, x):
@@ -426,6 +449,7 @@ class AntiAliasInterpolation2d(nn.Layer):
"""
Band-limited downsampling, for better preservation of the input signal.
"""
+
def __init__(self, channels, scale, mobile_net=False):
super(AntiAliasInterpolation2d, self).__init__()
if mobile_net:
@@ -443,7 +467,8 @@ def __init__(self, channels, scale, mobile_net=False):
# gaussian function of each dimension.
kernel = 1
meshgrids = paddle.meshgrid(
- [paddle.arange(size, dtype='float32') for size in kernel_size])
+ [paddle.arange(
+ size, dtype='float32') for size in kernel_size])
for size, std, mgrid in zip(kernel_size, sigma, meshgrids):
mean = (size - 1) / 2
kernel *= paddle.exp(-(mgrid - mean)**2 / (2 * std**2 + 1e-9))
@@ -452,7 +477,7 @@ def __init__(self, channels, scale, mobile_net=False):
kernel = kernel / paddle.sum(kernel)
# Reshape to depthwise convolutional weight
kernel = kernel.reshape([1, 1, *kernel.shape])
- kernel = paddle.tile(kernel, [channels, *[1] * (kernel.dim() - 1)])
+ kernel = paddle.tile(kernel, [channels, * [1] * (kernel.dim() - 1)])
self.register_buffer('weight', kernel)
self.groups = channels
diff --git a/paddlers/models/ppgan/modules/init.py b/paddlers/models/ppgan/modules/init.py
index 12784ce1..47fa4065 100644
--- a/paddlers/models/ppgan/modules/init.py
+++ b/paddlers/models/ppgan/modules/init.py
@@ -74,8 +74,8 @@ def calculate_gain(nonlinearity, param=None):
# True/False are instances of int, hence check above
negative_slope = param
else:
- raise ValueError(
- "negative_slope {} not a valid number".format(param))
+ raise ValueError("negative_slope {} not a valid number".format(
+ param))
return math.sqrt(2.0 / (1 + negative_slope**2))
else:
raise ValueError("Unsupported nonlinearity {}".format(nonlinearity))
@@ -259,19 +259,15 @@ def kaiming_init(layer,
distribution='normal'):
assert distribution in ['uniform', 'normal']
if distribution == 'uniform':
- kaiming_uniform_(layer.weight,
- a=a,
- mode=mode,
- nonlinearity=nonlinearity)
+ kaiming_uniform_(
+ layer.weight, a=a, mode=mode, nonlinearity=nonlinearity)
else:
kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity)
if hasattr(layer, 'bias') and layer.bias is not None:
constant_(layer.bias, bias)
-def init_weights(net,
- init_type='normal',
- init_gain=0.02,
+def init_weights(net, init_type='normal', init_gain=0.02,
distribution='normal'):
"""Initialize network weights.
Args:
@@ -281,10 +277,11 @@ def init_weights(net,
We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might
work better for some applications. Feel free to try yourself.
"""
+
def init_func(m): # define the initialization function
classname = m.__class__.__name__
- if hasattr(m, 'weight') and (classname.find('Conv') != -1
- or classname.find('Linear') != -1):
+ if hasattr(m, 'weight') and (classname.find('Conv') != -1 or
+ classname.find('Linear') != -1):
if init_type == 'normal':
normal_(m.weight, 0.0, init_gain)
elif init_type == 'xavier':
diff --git a/paddlers/models/ppgan/modules/keypoint_detector.py b/paddlers/models/ppgan/modules/keypoint_detector.py
index 809e64d4..f609633d 100644
--- a/paddlers/models/ppgan/modules/keypoint_detector.py
+++ b/paddlers/models/ppgan/modules/keypoint_detector.py
@@ -12,6 +12,7 @@ class KPDetector(nn.Layer):
"""
Detecting a keypoints. Return keypoint position and jacobian near each keypoint.
"""
+
def __init__(self,
block_expansion,
num_kp,
@@ -26,79 +27,90 @@ def __init__(self,
mobile_net=False):
super(KPDetector, self).__init__()
- self.predictor = Hourglass(block_expansion,
- in_features=num_channels,
- max_features=max_features,
- num_blocks=num_blocks,
- mobile_net=mobile_net)
+ self.predictor = Hourglass(
+ block_expansion,
+ in_features=num_channels,
+ max_features=max_features,
+ num_blocks=num_blocks,
+ mobile_net=mobile_net)
if mobile_net:
self.kp = nn.Sequential(
- nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=self.predictor.out_filters,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=pad),
- nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=self.predictor.out_filters,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=pad),
- nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=num_kp,
- kernel_size=3,
- weight_attr=nn.initializer.KaimingUniform(),
- padding=pad))
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=self.predictor.out_filters,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=pad),
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=self.predictor.out_filters,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=pad),
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=num_kp,
+ kernel_size=3,
+ weight_attr=nn.initializer.KaimingUniform(),
+ padding=pad))
else:
- self.kp = nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=num_kp,
- kernel_size=(7, 7),
- padding=pad)
+ self.kp = nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=num_kp,
+ kernel_size=(7, 7),
+ padding=pad)
if estimate_jacobian:
self.num_jacobian_maps = 1 if single_jacobian_map else num_kp
if mobile_net:
self.jacobian = nn.Sequential(
- nn.Conv2D(in_channels=self.predictor.out_filters,
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
out_channels=self.predictor.out_filters,
kernel_size=3,
padding=pad),
- nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=self.predictor.out_filters,
- kernel_size=3,
- padding=pad),
- nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=4 * self.num_jacobian_maps,
- kernel_size=3,
- padding=pad),
- )
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=self.predictor.out_filters,
+ kernel_size=3,
+ padding=pad),
+ nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=4 * self.num_jacobian_maps,
+ kernel_size=3,
+ padding=pad), )
self.jacobian[0].weight.set_value(
- paddle.zeros(self.jacobian[0].weight.shape, dtype='float32'))
+ paddle.zeros(
+ self.jacobian[0].weight.shape, dtype='float32'))
self.jacobian[1].weight.set_value(
- paddle.zeros(self.jacobian[1].weight.shape, dtype='float32'))
+ paddle.zeros(
+ self.jacobian[1].weight.shape, dtype='float32'))
self.jacobian[2].weight.set_value(
- paddle.zeros(self.jacobian[2].weight.shape, dtype='float32'))
+ paddle.zeros(
+ self.jacobian[2].weight.shape, dtype='float32'))
self.jacobian[2].bias.set_value(
- paddle.to_tensor([1, 0, 0, 1] *
- self.num_jacobian_maps).astype('float32'))
+ paddle.to_tensor([1, 0, 0, 1] * self.num_jacobian_maps)
+ .astype('float32'))
else:
- self.jacobian = nn.Conv2D(in_channels=self.predictor.out_filters,
- out_channels=4 * self.num_jacobian_maps,
- kernel_size=(7, 7),
- padding=pad)
+ self.jacobian = nn.Conv2D(
+ in_channels=self.predictor.out_filters,
+ out_channels=4 * self.num_jacobian_maps,
+ kernel_size=(7, 7),
+ padding=pad)
self.jacobian.weight.set_value(
- paddle.zeros(self.jacobian.weight.shape, dtype='float32'))
+ paddle.zeros(
+ self.jacobian.weight.shape, dtype='float32'))
self.jacobian.bias.set_value(
- paddle.to_tensor([1, 0, 0, 1] *
- self.num_jacobian_maps).astype('float32'))
+ paddle.to_tensor([1, 0, 0, 1] * self.num_jacobian_maps)
+ .astype('float32'))
else:
self.jacobian = None
self.temperature = temperature
self.scale_factor = scale_factor
if self.scale_factor != 1:
- self.down = AntiAliasInterpolation2d(num_channels,
- self.scale_factor,
- mobile_net=mobile_net)
+ self.down = AntiAliasInterpolation2d(
+ num_channels, self.scale_factor, mobile_net=mobile_net)
def gaussian2kp(self, heatmap):
"""
diff --git a/paddlers/models/ppgan/modules/norm.py b/paddlers/models/ppgan/modules/norm.py
index 2cc3e994..f584ac23 100644
--- a/paddlers/models/ppgan/modules/norm.py
+++ b/paddlers/models/ppgan/modules/norm.py
@@ -47,9 +47,10 @@ def build_norm_layer(norm_type='instance'):
initializer=nn.initializer.Constant(1.0),
learning_rate=0.0,
trainable=False),
- bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0),
- learning_rate=0.0,
- trainable=False))
+ bias_attr=paddle.ParamAttr(
+ initializer=nn.initializer.Constant(0.0),
+ learning_rate=0.0,
+ trainable=False))
elif norm_type == 'spectral':
norm_layer = functools.partial(Spectralnorm)
elif norm_type == 'none':
diff --git a/paddlers/models/ppgan/modules/wing.py b/paddlers/models/ppgan/modules/wing.py
index 4cdc1826..a4513e84 100644
--- a/paddlers/models/ppgan/modules/wing.py
+++ b/paddlers/models/ppgan/modules/wing.py
@@ -23,16 +23,17 @@ def __init__(self, num_modules, depth, num_features, first_one=False):
self.num_modules = num_modules
self.depth = depth
self.features = num_features
- self.coordconv = CoordConvTh(64,
- 64,
- True,
- True,
- 256,
- first_one,
- out_channels=256,
- kernel_size=1,
- stride=1,
- padding=0)
+ self.coordconv = CoordConvTh(
+ 64,
+ 64,
+ True,
+ True,
+ 256,
+ first_one,
+ out_channels=256,
+ kernel_size=1,
+ stride=1,
+ padding=0)
self._generate_network(self.depth)
def _generate_network(self, level):
@@ -79,13 +80,13 @@ def __init__(self, height=64, width=64, with_r=False, with_boundary=False):
(height, width)).astype('float32')
x_coords = (x_coords / (height - 1)) * 2 - 1
y_coords = (y_coords / (width - 1)) * 2 - 1
- coords = paddle.stack([x_coords, y_coords],
- axis=0) # (2, height, width)
+ coords = paddle.stack(
+ [x_coords, y_coords], axis=0) # (2, height, width)
if self.with_r:
rr = paddle.sqrt(
- paddle.pow(x_coords, 2) +
- paddle.pow(y_coords, 2)) # (height, width)
+ paddle.pow(x_coords, 2) + paddle.pow(y_coords,
+ 2)) # (height, width)
rr = (rr / paddle.max(rr)).unsqueeze(0)
coords = paddle.concat([coords, rr], axis=0)
@@ -115,6 +116,7 @@ def forward(self, x, heatmap=None):
class CoordConvTh(nn.Layer):
"""CoordConv layer as in the paper."""
+
def __init__(self,
height,
width,
@@ -144,12 +146,13 @@ class ConvBlock(nn.Layer):
def __init__(self, in_planes, out_planes):
super(ConvBlock, self).__init__()
self.bn1 = nn.BatchNorm2D(in_planes)
- conv3x3 = partial(nn.Conv2D,
- kernel_size=3,
- stride=1,
- padding=1,
- bias_attr=False,
- dilation=1)
+ conv3x3 = partial(
+ nn.Conv2D,
+ kernel_size=3,
+ stride=1,
+ padding=1,
+ bias_attr=False,
+ dilation=1)
self.conv1 = conv3x3(in_planes, int(out_planes / 2))
self.bn2 = nn.BatchNorm2D(int(out_planes / 2))
self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4))
@@ -159,8 +162,10 @@ def __init__(self, in_planes, out_planes):
self.downsample = None
if in_planes != out_planes:
self.downsample = nn.Sequential(
- nn.BatchNorm2D(in_planes), nn.ReLU(True),
- nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False))
+ nn.BatchNorm2D(in_planes),
+ nn.ReLU(True),
+ nn.Conv2D(
+ in_planes, out_planes, 1, 1, bias_attr=False))
def forward(self, x):
residual = x
@@ -235,16 +240,17 @@ def shift(x, N):
IDXPAIR = namedtuple('IDXPAIR', 'start end')
-index_map = Munch(chin=IDXPAIR(0 + 8, 33 - 8),
- eyebrows=IDXPAIR(33, 51),
- eyebrowsedges=IDXPAIR(33, 46),
- nose=IDXPAIR(51, 55),
- nostrils=IDXPAIR(55, 60),
- eyes=IDXPAIR(60, 76),
- lipedges=IDXPAIR(76, 82),
- lipupper=IDXPAIR(77, 82),
- liplower=IDXPAIR(83, 88),
- lipinner=IDXPAIR(88, 96))
+index_map = Munch(
+ chin=IDXPAIR(0 + 8, 33 - 8),
+ eyebrows=IDXPAIR(33, 51),
+ eyebrowsedges=IDXPAIR(33, 46),
+ nose=IDXPAIR(51, 55),
+ nostrils=IDXPAIR(55, 60),
+ eyes=IDXPAIR(60, 76),
+ lipedges=IDXPAIR(76, 82),
+ lipupper=IDXPAIR(77, 82),
+ liplower=IDXPAIR(83, 88),
+ lipinner=IDXPAIR(88, 96))
OPPAIR = namedtuple('OPPAIR', 'shift resize')
@@ -255,12 +261,13 @@ def preprocess(x):
x = normalize(x)
sw = H // 256
- operations = Munch(chin=OPPAIR(0, 3),
- eyebrows=OPPAIR(-7 * sw, 2),
- nostrils=OPPAIR(8 * sw, 4),
- lipupper=OPPAIR(-8 * sw, 4),
- liplower=OPPAIR(8 * sw, 4),
- lipinner=OPPAIR(-2 * sw, 3))
+ operations = Munch(
+ chin=OPPAIR(0, 3),
+ eyebrows=OPPAIR(-7 * sw, 2),
+ nostrils=OPPAIR(8 * sw, 4),
+ lipupper=OPPAIR(-8 * sw, 4),
+ liplower=OPPAIR(8 * sw, 4),
+ lipinner=OPPAIR(-2 * sw, 3))
for part, ops in operations.items():
start, end = index_map[part]
@@ -268,8 +275,7 @@ def preprocess(x):
zero_out = paddle.concat([
paddle.arange(0, index_map.chin.start),
- paddle.arange(index_map.chin.end, 33),
- paddle.to_tensor([
+ paddle.arange(index_map.chin.end, 33), paddle.to_tensor([
index_map.eyebrowsedges.start, index_map.eyebrowsedges.end,
index_map.lipedges.start, index_map.lipedges.end
])
@@ -291,10 +297,10 @@ def preprocess(x):
# Second-level mask
x2 = deepcopy(x)
x2[:, index_map.chin.start:index_map.chin.end] = 0 # start:end was 0:33
- x2[:, index_map.lipedges.start:index_map.lipinner.
- end] = 0 # start:end was 76:96
- x2[:, index_map.eyebrows.start:index_map.eyebrows.
- end] = 0 # start:end was 33:51
+ x2[:, index_map.lipedges.start:
+ index_map.lipinner.end] = 0 # start:end was 76:96
+ x2[:, index_map.eyebrows.start:
+ index_map.eyebrows.end] = 0 # start:end was 33:51
x = paddle.sum(x, axis=1, keepdim=True) # (N, 1, H, W)
x2 = paddle.sum(x2, axis=1, keepdim=True) # mask without faceline and mouth
diff --git a/paddlers/models/ppgan/solver/lr_scheduler.py b/paddlers/models/ppgan/solver/lr_scheduler.py
index ca68528a..41701a50 100644
--- a/paddlers/models/ppgan/solver/lr_scheduler.py
+++ b/paddlers/models/ppgan/solver/lr_scheduler.py
@@ -38,8 +38,8 @@ def __init__(self, learning_rate, start_epoch, decay_epochs,
iters_per_epoch):
def lambda_rule(epoch):
epoch = epoch // iters_per_epoch
- lr_l = 1.0 - max(0,
- epoch + 1 - start_epoch) / float(decay_epochs + 1)
+ lr_l = 1.0 - max(0, epoch + 1 -
+ start_epoch) / float(decay_epochs + 1)
return lr_l
super().__init__(learning_rate, lambda_rule)
@@ -66,6 +66,7 @@ class CosineAnnealingRestartLR(LRScheduler):
eta_min (float): The mimimum learning rate of the cosine anneling cycle. Default: 0.
last_epoch (int): Used in paddle.nn._LRScheduler. Default: -1.
"""
+
def __init__(self,
learning_rate,
periods,
diff --git a/paddlers/models/ppgan/utils/audio.py b/paddlers/models/ppgan/utils/audio.py
index 432305bd..ef4e07d1 100644
--- a/paddlers/models/ppgan/utils/audio.py
+++ b/paddlers/models/ppgan/utils/audio.py
@@ -81,10 +81,11 @@ def _stft(y):
return _lws_processor(audio_config).stft(y).T
else:
librosa = try_import('librosa')
- return librosa.stft(y=y,
- n_fft=audio_config.n_fft,
- hop_length=get_hop_size(),
- win_length=audio_config.win_size)
+ return librosa.stft(
+ y=y,
+ n_fft=audio_config.n_fft,
+ hop_length=get_hop_size(),
+ win_length=audio_config.win_size)
##########################################################
@@ -149,16 +150,15 @@ def _db_to_amp(x):
def _normalize(S):
if audio_config.allow_clipping_in_normalization:
if audio_config.symmetric_mels:
- return np.clip(
- (2 * audio_config.max_abs_value) *
- ((S - audio_config.min_level_db) /
- (-audio_config.min_level_db)) - audio_config.max_abs_value,
- -audio_config.max_abs_value, audio_config.max_abs_value)
+ return np.clip((2 * audio_config.max_abs_value) * (
+ (S - audio_config.min_level_db) /
+ (-audio_config.min_level_db)) - audio_config.max_abs_value,
+ -audio_config.max_abs_value,
+ audio_config.max_abs_value)
else:
- return np.clip(
- audio_config.max_abs_value * ((S - audio_config.min_level_db) /
- (-audio_config.min_level_db)), 0,
- audio_config.max_abs_value)
+ return np.clip(audio_config.max_abs_value * (
+ (S - audio_config.min_level_db) / (-audio_config.min_level_db)),
+ 0, audio_config.max_abs_value)
assert S.max() <= 0 and S.min() - audio_config.min_level_db >= 0
if audio_config.symmetric_mels:
@@ -166,18 +166,18 @@ def _normalize(S):
(S - audio_config.min_level_db) /
(-audio_config.min_level_db)) - audio_config.max_abs_value
else:
- return audio_config.max_abs_value * ((S - audio_config.min_level_db) /
- (-audio_config.min_level_db))
+ return audio_config.max_abs_value * (
+ (S - audio_config.min_level_db) / (-audio_config.min_level_db))
def _denormalize(D):
if audio_config.allow_clipping_in_normalization:
if audio_config.symmetric_mels:
- return (((np.clip(D, -audio_config.max_abs_value,
- audio_config.max_abs_value) +
- audio_config.max_abs_value) * -audio_config.min_level_db /
- (2 * audio_config.max_abs_value)) +
- audio_config.min_level_db)
+ return (
+ ((np.clip(D, -audio_config.max_abs_value,
+ audio_config.max_abs_value) +
+ audio_config.max_abs_value) * -audio_config.min_level_db /
+ (2 * audio_config.max_abs_value)) + audio_config.min_level_db)
else:
return ((np.clip(D, 0, audio_config.max_abs_value) *
-audio_config.min_level_db / audio_config.max_abs_value) +
diff --git a/paddlers/models/ppgan/utils/filesystem.py b/paddlers/models/ppgan/utils/filesystem.py
index 9b0ce88b..b32184f9 100644
--- a/paddlers/models/ppgan/utils/filesystem.py
+++ b/paddlers/models/ppgan/utils/filesystem.py
@@ -32,9 +32,7 @@ def convert(state_dict):
model_dict = {}
for k, v in state_dict.items():
- if isinstance(
- v,
- (paddle.static.Variable, paddle.Tensor)):
+ if isinstance(v, (paddle.static.Variable, paddle.Tensor)):
model_dict[k] = v.numpy()
else:
model_dict[k] = v
@@ -43,9 +41,7 @@ def convert(state_dict):
final_dict = {}
for k, v in state_dicts.items():
- if isinstance(
- v,
- (paddle.static.Variable, paddle.Tensor)):
+ if isinstance(v, (paddle.static.Variable, paddle.Tensor)):
final_dict = convert(state_dicts)
break
elif isinstance(v, dict):
diff --git a/paddlers/models/ppgan/utils/image_pool.py b/paddlers/models/ppgan/utils/image_pool.py
index e76dcead..956b32a3 100644
--- a/paddlers/models/ppgan/utils/image_pool.py
+++ b/paddlers/models/ppgan/utils/image_pool.py
@@ -25,6 +25,7 @@ class ImagePool():
Args:
pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created
"""
+
def __init__(self, pool_size, prob=0.5):
self.pool_size = pool_size
self.prob = prob
diff --git a/paddlers/models/ppgan/utils/photopen.py b/paddlers/models/ppgan/utils/photopen.py
index a8fdf263..82b6e657 100644
--- a/paddlers/models/ppgan/utils/photopen.py
+++ b/paddlers/models/ppgan/utils/photopen.py
@@ -20,15 +20,22 @@
import functools
from paddle.nn import Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, Linear
+
# 处理图片数据:裁切、水平翻转、调整图片数据形状、归一化数据
-def data_transform(img, resize_w, resize_h, load_size=286, pos=[0, 0, 256, 256], flip=True, is_image=True):
+def data_transform(img,
+ resize_w,
+ resize_h,
+ load_size=286,
+ pos=[0, 0, 256, 256],
+ flip=True,
+ is_image=True):
if is_image:
resized = img.resize((resize_w, resize_h), Image.BICUBIC)
else:
resized = img.resize((resize_w, resize_h), Image.NEAREST)
croped = resized.crop((pos[0], pos[1], pos[2], pos[3]))
fliped = ImageOps.mirror(croped) if flip else croped
- fliped = np.array(fliped) # transform to numpy array
+ fliped = np.array(fliped) # transform to numpy array
expanded = np.expand_dims(fliped, 2) if len(fliped.shape) < 3 else fliped
transposed = np.transpose(expanded, (2, 0, 1)).astype('float32')
if is_image:
@@ -37,34 +44,55 @@ def data_transform(img, resize_w, resize_h, load_size=286, pos=[0, 0, 256, 256],
normalized = transposed
return normalized
+
# 定义CoCo数据集对象
class COCODateset(Dataset):
def __init__(self, opt):
super(COCODateset, self).__init__()
- inst_dir = opt.dataroot+'train_inst/'
+ inst_dir = opt.dataroot + 'train_inst/'
_, _, inst_list = next(os.walk(inst_dir))
self.inst_list = np.sort(inst_list)
self.opt = opt
def __getitem__(self, idx):
- ins = Image.open(self.opt.dataroot+'train_inst/'+self.inst_list[idx])
- img = Image.open(self.opt.dataroot+'train_img/'+self.inst_list[idx].replace(".png", ".jpg"))
+ ins = Image.open(self.opt.dataroot + 'train_inst/' + self.inst_list[
+ idx])
+ img = Image.open(self.opt.dataroot + 'train_img/' + self.inst_list[idx]
+ .replace(".png", ".jpg"))
img = img.convert('RGB')
w, h = img.size
resize_w, resize_h = 0, 0
if w < h:
- resize_w, resize_h = self.opt.load_size, int(h * self.opt.load_size / w)
+ resize_w, resize_h = self.opt.load_size, int(h *
+ self.opt.load_size / w)
else:
- resize_w, resize_h = int(w * self.opt.load_size / h), self.opt.load_size
+ resize_w, resize_h = int(w * self.opt.load_size /
+ h), self.opt.load_size
left = random.randint(0, resize_w - self.opt.crop_size)
top = random.randint(0, resize_h - self.opt.crop_size)
flip = False
-
- img = data_transform(img, resize_w, resize_h, load_size=opt.load_size,
- pos=[left, top, left + self.opt.crop_size, top + self.opt.crop_size], flip=flip, is_image=True)
- ins = data_transform(ins, resize_w, resize_h, load_size=opt.load_size,
- pos=[left, top, left + self.opt.crop_size, top + self.opt.crop_size], flip=flip, is_image=False)
+
+ img = data_transform(
+ img,
+ resize_w,
+ resize_h,
+ load_size=opt.load_size,
+ pos=[
+ left, top, left + self.opt.crop_size, top + self.opt.crop_size
+ ],
+ flip=flip,
+ is_image=True)
+ ins = data_transform(
+ ins,
+ resize_w,
+ resize_h,
+ load_size=opt.load_size,
+ pos=[
+ left, top, left + self.opt.crop_size, top + self.opt.crop_size
+ ],
+ flip=flip,
+ is_image=False)
return img, ins, self.inst_list[idx]
@@ -85,14 +113,17 @@ def data_onehot_pro(instance, opt):
edge = np.zeros(instance.shape, 'int64')
t = instance.numpy()
edge[:, :, :, 1:] = edge[:, :, :, 1:] | (t[:, :, :, 1:] != t[:, :, :, :-1])
- edge[:, :, :, :-1] = edge[:, :, :, :-1] | (t[:, :, :, 1:] != t[:, :, :, :-1])
+ edge[:, :, :, :-1] = edge[:, :, :, :-1] | (
+ t[:, :, :, 1:] != t[:, :, :, :-1])
edge[:, :, 1:, :] = edge[:, :, 1:, :] | (t[:, :, 1:, :] != t[:, :, :-1, :])
- edge[:, :, :-1, :] = edge[:, :, :-1, :] | (t[:, :, 1:, :] != t[:, :, :-1, :])
+ edge[:, :, :-1, :] = edge[:, :, :-1, :] | (
+ t[:, :, 1:, :] != t[:, :, :-1, :])
edge = paddle.to_tensor(edge).astype('float32')
semantics = paddle.concat([semantics, edge], 1)
return semantics
+
# 设置除 spade 以外的归一化层
def build_norm_layer(norm_type='instance'):
"""Return a normalization layer
@@ -105,17 +136,12 @@ def build_norm_layer(norm_type='instance'):
"""
if norm_type == 'batch':
norm_layer = functools.partial(
- nn.BatchNorm2D,
- weight_attr=False,
- bias_attr=False)
+ nn.BatchNorm2D, weight_attr=False, bias_attr=False)
elif norm_type == 'syncbatch':
norm_layer = functools.partial(
- nn.SyncBatchNorm,
- weight_attr=False,
- bias_attr=False)
+ nn.SyncBatchNorm, weight_attr=False, bias_attr=False)
elif norm_type == 'instance':
- norm_layer = functools.partial(
- nn.InstanceNorm2D,)
+ norm_layer = functools.partial(nn.InstanceNorm2D, )
elif norm_type == 'spectral':
norm_layer = functools.partial(Spectralnorm)
elif norm_type == 'none':
@@ -127,14 +153,16 @@ def norm_layer(x):
norm_type)
return norm_layer
+
def simam(x, e_lambda=1e-4):
b, c, h, w = x.shape
n = w * h - 1
- x_minus_mu_square = (x - x.mean(axis=[2, 3], keepdim=True)) ** 2
- y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(axis=[2, 3], keepdim=True) / n + e_lambda)) + 0.5
+ x_minus_mu_square = (x - x.mean(axis=[2, 3], keepdim=True))**2
+ y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(
+ axis=[2, 3], keepdim=True) / n + e_lambda)) + 0.5
return x * nn.functional.sigmoid(y)
+
class Dict(dict):
__setattr__ = dict.__setitem__
__getattr__ = dict.__getitem__
-
diff --git a/paddlers/models/ppgan/utils/setup.py b/paddlers/models/ppgan/utils/setup.py
index 78df1092..b7301fcc 100644
--- a/paddlers/models/ppgan/utils/setup.py
+++ b/paddlers/models/ppgan/utils/setup.py
@@ -19,6 +19,7 @@
import random
from .logger import setup_logger
+
def setup(args, cfg):
if args.evaluate_only:
cfg.is_train = False
@@ -44,10 +45,9 @@ def setup(args, cfg):
paddle.set_device('gpu')
else:
paddle.set_device('cpu')
-
+
if args.seed:
paddle.seed(args.seed)
random.seed(args.seed)
- np.random.seed(args.seed)
+ np.random.seed(args.seed)
paddle.framework.random._manual_program_seed(args.seed)
-
diff --git a/paddlers/models/ppgan/utils/visual.py b/paddlers/models/ppgan/utils/visual.py
index 69826346..b0cea537 100644
--- a/paddlers/models/ppgan/utils/visual.py
+++ b/paddlers/models/ppgan/utils/visual.py
@@ -36,8 +36,8 @@ def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False):
images separately rather than the (min, max) over all images. Default: ``False``.
"""
if not (isinstance(tensor, paddle.Tensor) or
- (isinstance(tensor, list)
- and all(isinstance(t, paddle.Tensor) for t in tensor))):
+ (isinstance(tensor, list) and
+ all(isinstance(t, paddle.Tensor) for t in tensor))):
raise TypeError('tensor or list of tensors expected, got {}'.format(
type(tensor)))
@@ -92,15 +92,15 @@ def norm_range(t, range):
ymaps = int(math.ceil(float(nmaps) / xmaps))
height, width = int(tensor.shape[2]), int(tensor.shape[3])
num_channels = tensor.shape[1]
- canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps),
- dtype=tensor.dtype)
+ canvas = paddle.zeros(
+ (num_channels, height * ymaps, width * xmaps), dtype=tensor.dtype)
k = 0
for y in irange(ymaps):
for x in irange(xmaps):
if k >= nmaps:
break
- canvas[:, y * height:(y + 1) * height,
- x * width:(x + 1) * width] = tensor[k]
+ canvas[:, y * height:(y + 1) * height, x * width:(x + 1) *
+ width] = tensor[k]
k = k + 1
return canvas
@@ -113,6 +113,7 @@ def tensor2img(input_image, min_max=(-1., 1.), image_num=1, imtype=np.uint8):
image_num (int): the convert iamge numbers
imtype (type): the desired type of the converted numpy array
"""
+
def processing(img, transpose=True):
""""processing one numpy image.
@@ -152,7 +153,8 @@ def processing(img, transpose=True):
else:
# for more image, log NCHW image
image_numpy = np.stack(
- [processing(im, transpose=False) for im in image_numpy])
+ [processing(
+ im, transpose=False) for im in image_numpy])
else:
# if it is a numpy array, do nothing
diff --git a/paddlers/models/ppseg/core/infer.py b/paddlers/models/ppseg/core/infer.py
index a66288d7..cacdb9dd 100644
--- a/paddlers/models/ppseg/core/infer.py
+++ b/paddlers/models/ppseg/core/infer.py
@@ -64,10 +64,10 @@ def get_reverse_list(ori_shape, transforms):
else:
w = long_edge
h = short_edge
- if op.__class__.__name__ in ['Padding']:
+ if op.__class__.__name__ in ['Pad']:
reverse_list.append(('padding', (h, w)))
w, h = op.target_size[0], op.target_size[1]
- if op.__class__.__name__ in ['PaddingByAspectRatio']:
+ if op.__class__.__name__ in ['PadByAspectRatio']:
reverse_list.append(('padding', (h, w)))
ratio = w / h
if ratio == op.aspect_ratio:
diff --git a/paddlers/models/ppseg/datasets/dataset.py b/paddlers/models/ppseg/datasets/dataset.py
index a9fab0d8..b4faf6a1 100644
--- a/paddlers/models/ppseg/datasets/dataset.py
+++ b/paddlers/models/ppseg/datasets/dataset.py
@@ -49,7 +49,7 @@ class Dataset(paddle.io.Dataset):
import paddlers.models.ppseg.transforms as T
from paddlers.models.ppseg.datasets import Dataset
- transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()]
+ transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()]
dataset_root = 'dataset_root_path'
train_path = 'train_path'
num_classes = 2
diff --git a/paddlers/models/ppseg/models/__init__.py b/paddlers/models/ppseg/models/__init__.py
index 62b3cc64..4a7d535b 100644
--- a/paddlers/models/ppseg/models/__init__.py
+++ b/paddlers/models/ppseg/models/__init__.py
@@ -54,4 +54,4 @@
from .enet import ENet
from .bisenetv1 import BiseNetV1
from .fastfcn import FastFCN
-from .pfpnnet import PFPNNet
\ No newline at end of file
+from .pfpnnet import PFPNNet
diff --git a/paddlers/models/ppseg/transforms/transforms.py b/paddlers/models/ppseg/transforms/transforms.py
index b5bb6352..5f2b6406 100644
--- a/paddlers/models/ppseg/transforms/transforms.py
+++ b/paddlers/models/ppseg/transforms/transforms.py
@@ -480,7 +480,7 @@ def __call__(self, im, label=None):
@manager.TRANSFORMS.add_component
-class Padding:
+class Pad:
"""
Add bottom-right padding to a raw image or annotation image.
@@ -560,7 +560,7 @@ def __call__(self, im, label=None):
@manager.TRANSFORMS.add_component
-class PaddingByAspectRatio:
+class PadByAspectRatio:
"""
Args:
@@ -597,15 +597,14 @@ def __call__(self, im, label=None):
img_height = int(img_width / self.aspect_ratio)
else:
img_width = int(img_height * self.aspect_ratio)
- padding = Padding(
- (img_width, img_height),
- im_padding_value=self.im_padding_value,
- label_padding_value=self.label_padding_value)
+ padding = Pad((img_width, img_height),
+ im_padding_value=self.im_padding_value,
+ label_padding_value=self.label_padding_value)
return padding(im, label)
@manager.TRANSFORMS.add_component
-class RandomPaddingCrop:
+class RandomPadCrop:
"""
Crop a sub-image from a raw image and annotation image randomly. If the target cropping size
is larger than original image, then the bottom-right padding will be added.
@@ -768,7 +767,7 @@ def __call__(self, im, label=None):
@manager.TRANSFORMS.add_component
-class ScalePadding:
+class ScalePad:
"""
Add center padding to a raw image or annotation image,then scale the
image to target size.
diff --git a/paddlers/tasks/__init__.py b/paddlers/tasks/__init__.py
index a835a5cc..ca50e606 100644
--- a/paddlers/tasks/__init__.py
+++ b/paddlers/tasks/__init__.py
@@ -17,4 +17,4 @@
from .change_detector import *
from .classifier import *
from .load_model import load_model
-from .image_restorer import *
\ No newline at end of file
+from .image_restorer import *
diff --git a/paddlers/tasks/change_detector.py b/paddlers/tasks/change_detector.py
index 9035127b..21eac252 100644
--- a/paddlers/tasks/change_detector.py
+++ b/paddlers/tasks/change_detector.py
@@ -29,7 +29,7 @@
import paddlers.utils.logging as logging
import paddlers.models.ppseg as paddleseg
from paddlers.transforms import arrange_transforms
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import Resize, decode_image
from paddlers.utils import get_single_card_bs, DisablePrint
from paddlers.utils.checkpoint import seg_pretrain_weights_dict
from .base import BaseModel
@@ -501,9 +501,9 @@ def predict(self, img_file, transforms=None):
Do inference.
Args:
Args:
- img_file(List[tuple], Tuple[str or np.ndarray]):
- Tuple of image paths or decoded image data in a BGR format for bi-temporal images, which also could constitute
- a list, meaning all image pairs to be predicted as a mini-batch.
+ img_file (list[tuple] | tuple[str | np.ndarray]):
+ Tuple of image paths or decoded image data for bi-temporal images, which also could constitute a list,
+ meaning all image pairs to be predicted as a mini-batch.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
@@ -546,19 +546,24 @@ def predict(self, img_file, transforms=None):
}
return prediction
- def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None):
+ def slider_predict(self,
+ img_file,
+ save_dir,
+ block_size,
+ overlap=36,
+ transforms=None):
"""
Do inference.
Args:
Args:
- img_file(List[str]):
+ img_file(list[str]):
List of image paths.
save_dir(str):
Directory that contains saved geotiff file.
- block_size(List[int] or Tuple[int], int):
- The size of block.
- overlap(List[int] or Tuple[int], int):
- The overlap between two blocks. Defaults to 36.
+ block_size(list[int] | tuple[int] | int, optional):
+ Size of block.
+ overlap(list[int] | tuple[int] | int, optional):
+ Overlap between two blocks. Defaults to 36.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
"""
@@ -566,7 +571,7 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
from osgeo import gdal
except:
import gdal
-
+
if len(img_file) != 2:
raise ValueError("`img_file` must be a list of length 2.")
if isinstance(block_size, int):
@@ -574,13 +579,15 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
elif isinstance(block_size, (tuple, list)) and len(block_size) == 2:
block_size = tuple(block_size)
else:
- raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.")
+ raise ValueError(
+ "`block_size` must be a tuple/list of length 2 or an integer.")
if isinstance(overlap, int):
overlap = (overlap, overlap)
elif isinstance(overlap, (tuple, list)) and len(overlap) == 2:
overlap = tuple(overlap)
else:
- raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.")
+ raise ValueError(
+ "`overlap` must be a tuple/list of length 2 or an integer.")
src1_data = gdal.Open(img_file[0])
src2_data = gdal.Open(img_file[1])
@@ -589,7 +596,8 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
bands = src1_data.RasterCount
driver = gdal.GetDriverByName("GTiff")
- file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[0] + ".tif"
+ file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[
+ 0] + ".tif"
if not osp.exists(save_dir):
os.makedirs(save_dir)
save_file = osp.join(save_dir, file_name)
@@ -607,17 +615,21 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
xsize = int(width - xoff)
if yoff + ysize > height:
ysize = int(height - yoff)
- im1 = src1_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
- im2 = src2_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+ im1 = src1_data.ReadAsArray(
+ int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+ im2 = src2_data.ReadAsArray(
+ int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
# fill
h, w = im1.shape[:2]
- im1_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im1.dtype)
+ im1_fill = np.zeros(
+ (block_size[1], block_size[0], bands), dtype=im1.dtype)
im2_fill = im1_fill.copy()
im1_fill[:h, :w, :] = im1
im2_fill[:h, :w, :] = im2
im_fill = (im1_fill, im2_fill)
# predict
- pred = self.predict(im_fill, transforms)["label_map"].astype("uint8")
+ pred = self.predict(im_fill,
+ transforms)["label_map"].astype("uint8")
# overlap
rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize)
mask = (rd_block == pred[:h, :w]) | (rd_block == 255)
@@ -634,13 +646,12 @@ def _preprocess(self, images, transforms, to_tensor=True):
batch_im1, batch_im2 = list(), list()
batch_ori_shape = list()
for im1, im2 in images:
- sample = {'image_t1': im1, 'image_t2': im2}
- if isinstance(sample['image_t1'], str) or \
- isinstance(sample['image_t2'], str):
- sample = ImgDecoder(to_rgb=False)(sample)
- ori_shape = sample['image'].shape[:2]
- else:
- ori_shape = im1.shape[:2]
+ if isinstance(im1, str) or isinstance(im2, str):
+ im1 = decode_image(im1, to_rgb=False)
+ im2 = decode_image(im2, to_rgb=False)
+ ori_shape = im1.shape[:2]
+ # XXX: sample do not contain 'image_t1' and 'image_t2'.
+ sample = {'image': im1, 'image2': im2}
im1, im2 = transforms(sample)[:2]
batch_im1.append(im1)
batch_im2.append(im2)
@@ -679,7 +690,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms):
scale = float(op.long_size) / float(im_long_size)
h = int(round(h * scale))
w = int(round(w * scale))
- elif op.__class__.__name__ == 'Padding':
+ elif op.__class__.__name__ == 'Pad':
if op.target_size:
target_h, target_w = op.target_size
else:
diff --git a/paddlers/tasks/classifier.py b/paddlers/tasks/classifier.py
index 84564b69..c29d73f0 100644
--- a/paddlers/tasks/classifier.py
+++ b/paddlers/tasks/classifier.py
@@ -33,7 +33,7 @@
from paddlers.models.ppcls.loss import build_loss
from paddlers.models.ppcls.data.postprocess import build_postprocess
from paddlers.utils.checkpoint import cls_pretrain_weights_dict
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import Resize, decode_image
__all__ = [
"ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b"
@@ -410,9 +410,9 @@ def predict(self, img_file, transforms=None):
Do inference.
Args:
Args:
- img_file(List[np.ndarray or str], str or np.ndarray):
- Image path or decoded image data in a BGR format, which also could constitute a list,
- meaning all images to be predicted as a mini-batch.
+ img_file(list[np.ndarray | str] | str | np.ndarray):
+ Image path or decoded image data, which also could constitute a list, meaning all images to be
+ predicted as a mini-batch.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
@@ -465,10 +465,10 @@ def _preprocess(self, images, transforms, to_tensor=True):
batch_im = list()
batch_ori_shape = list()
for im in images:
+ if isinstance(im, str):
+ im = decode_image(im, to_rgb=False)
+ ori_shape = im.shape[:2]
sample = {'image': im}
- if isinstance(sample['image'], str):
- sample = ImgDecoder(to_rgb=False)(sample)
- ori_shape = sample['image'].shape[:2]
im = transforms(sample)
batch_im.append(im)
batch_ori_shape.append(ori_shape)
@@ -504,7 +504,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms):
scale = float(op.long_size) / float(im_long_size)
h = int(round(h * scale))
w = int(round(w * scale))
- elif op.__class__.__name__ == 'Padding':
+ elif op.__class__.__name__ == 'Pad':
if op.target_size:
target_h, target_w = op.target_size
else:
diff --git a/paddlers/tasks/object_detector.py b/paddlers/tasks/object_detector.py
index 0b8c74d3..f6ac956e 100644
--- a/paddlers/tasks/object_detector.py
+++ b/paddlers/tasks/object_detector.py
@@ -27,9 +27,10 @@
from paddlers.models.ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner
import paddlers
import paddlers.utils.logging as logging
-from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding
+from paddlers.transforms import decode_image
+from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Pad
from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \
- _BatchPadding, _Gt2YoloTarget
+ _BatchPad, _Gt2YoloTarget
from paddlers.transforms import arrange_transforms
from .base import BaseModel
from .utils.det_metrics import VOCMetric, COCOMetric
@@ -37,8 +38,7 @@
from paddlers.utils.checkpoint import det_pretrain_weights_dict
__all__ = [
- "YOLOv3", "FasterRCNN", "PPYOLO", "PPYOLOTiny", "PPYOLOv2", "MaskRCNN",
- "PicoDet"
+ "YOLOv3", "FasterRCNN", "PPYOLO", "PPYOLOTiny", "PPYOLOv2", "MaskRCNN"
]
@@ -511,9 +511,9 @@ def predict(self, img_file, transforms=None):
"""
Do inference.
Args:
- img_file(List[np.ndarray or str], str or np.ndarray):
- Image path or decoded image data in a BGR format, which also could constitute a list,
- meaning all images to be predicted as a mini-batch.
+ img_file(list[np.ndarray | str] | str | np.ndarray):
+ Image path or decoded image data, which also could constitute a list,meaning all images to be
+ predicted as a mini-batch.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
Returns:
@@ -549,8 +549,11 @@ def _preprocess(self, images, transforms, to_tensor=True):
model_type=self.model_type, transforms=transforms, mode='test')
batch_samples = list()
for im in images:
+ if isinstance(im, str):
+ im = decode_image(im, to_rgb=False)
sample = {'image': im}
- batch_samples.append(transforms(sample))
+ sample = transforms(sample)
+ batch_samples.append(sample)
batch_transforms = self._compose_batch_transform(transforms, 'test')
batch_samples = batch_transforms(batch_samples)
if to_tensor:
@@ -757,7 +760,7 @@ def __init__(self,
model_name='PicoDet', num_classes=num_classes, **params)
def _compose_batch_transform(self, transforms, mode='train'):
- default_batch_transforms = [_BatchPadding(pad_to_stride=32)]
+ default_batch_transforms = [_BatchPad(pad_to_stride=32)]
if mode == 'eval':
collate_batch = True
else:
@@ -1005,7 +1008,7 @@ def __init__(self,
def _compose_batch_transform(self, transforms, mode='train'):
if mode == 'train':
default_batch_transforms = [
- _BatchPadding(pad_to_stride=-1), _NormalizeBox(),
+ _BatchPad(pad_to_stride=-1), _NormalizeBox(),
_PadBox(getattr(self, 'num_max_boxes', 50)), _BboxXYXY2XYWH(),
_Gt2YoloTarget(
anchor_masks=self.anchor_masks,
@@ -1015,7 +1018,7 @@ def _compose_batch_transform(self, transforms, mode='train'):
num_classes=self.num_classes)
]
else:
- default_batch_transforms = [_BatchPadding(pad_to_stride=-1)]
+ default_batch_transforms = [_BatchPad(pad_to_stride=-1)]
if mode == 'eval' and self.metric == 'voc':
collate_batch = False
else:
@@ -1362,11 +1365,11 @@ def train(self,
def _compose_batch_transform(self, transforms, mode='train'):
if mode == 'train':
default_batch_transforms = [
- _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+ _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
]
else:
default_batch_transforms = [
- _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+ _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
]
custom_batch_transforms = []
for i, op in enumerate(transforms.transforms):
@@ -1408,7 +1411,7 @@ def _fix_transforms_shape(self, image_shape):
self.test_transforms.transforms[resize_op_idx] = Resize(
target_size=image_shape, keep_ratio=True, interp='CUBIC')
self.test_transforms.transforms.append(
- Padding(im_padding_value=[0., 0., 0.]))
+ Pad(im_padding_value=[0., 0., 0.]))
def _get_test_inputs(self, image_shape):
if image_shape is not None:
@@ -1418,7 +1421,7 @@ def _get_test_inputs(self, image_shape):
image_shape = [None, 3, -1, -1]
if self.with_fpn:
self.test_transforms.transforms.append(
- Padding(im_padding_value=[0., 0., 0.]))
+ Pad(im_padding_value=[0., 0., 0.]))
self.fixed_input_shape = image_shape
return self._define_input_spec(image_shape)
@@ -2187,11 +2190,11 @@ def train(self,
def _compose_batch_transform(self, transforms, mode='train'):
if mode == 'train':
default_batch_transforms = [
- _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+ _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
]
else:
default_batch_transforms = [
- _BatchPadding(pad_to_stride=32 if self.with_fpn else -1)
+ _BatchPad(pad_to_stride=32 if self.with_fpn else -1)
]
custom_batch_transforms = []
for i, op in enumerate(transforms.transforms):
@@ -2233,7 +2236,7 @@ def _fix_transforms_shape(self, image_shape):
self.test_transforms.transforms[resize_op_idx] = Resize(
target_size=image_shape, keep_ratio=True, interp='CUBIC')
self.test_transforms.transforms.append(
- Padding(im_padding_value=[0., 0., 0.]))
+ Pad(im_padding_value=[0., 0., 0.]))
def _get_test_inputs(self, image_shape):
if image_shape is not None:
@@ -2243,7 +2246,7 @@ def _get_test_inputs(self, image_shape):
image_shape = [None, 3, -1, -1]
if self.with_fpn:
self.test_transforms.transforms.append(
- Padding(im_padding_value=[0., 0., 0.]))
+ Pad(im_padding_value=[0., 0., 0.]))
self.fixed_input_shape = image_shape
return self._define_input_spec(image_shape)
diff --git a/paddlers/tasks/segmenter.py b/paddlers/tasks/segmenter.py
index 32b7dd0a..f55c772e 100644
--- a/paddlers/tasks/segmenter.py
+++ b/paddlers/tasks/segmenter.py
@@ -32,7 +32,7 @@
from .base import BaseModel
from .utils import seg_metrics as metrics
from paddlers.utils.checkpoint import seg_pretrain_weights_dict
-from paddlers.transforms import ImgDecoder, Resize
+from paddlers.transforms import Resize, decode_image
__all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg"]
@@ -478,9 +478,9 @@ def predict(self, img_file, transforms=None):
Do inference.
Args:
Args:
- img_file(List[np.ndarray or str], str or np.ndarray):
- Image path or decoded image data in a BGR format, which also could constitute a list,
- meaning all images to be predicted as a mini-batch.
+ img_file(list[np.ndarray | str] | str | np.ndarray):
+ Image path or decoded image data, which also could constitute a list,meaning all images to be
+ predicted as a mini-batch.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
@@ -519,7 +519,12 @@ def predict(self, img_file, transforms=None):
}
return prediction
- def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None):
+ def slider_predict(self,
+ img_file,
+ save_dir,
+ block_size,
+ overlap=36,
+ transforms=None):
"""
Do inference.
Args:
@@ -528,10 +533,10 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
Image path.
save_dir(str):
Directory that contains saved geotiff file.
- block_size(List[int] or Tuple[int], int):
- The size of block.
- overlap(List[int] or Tuple[int], int):
- The overlap between two blocks. Defaults to 36.
+ block_size(list[int] | tuple[int] | int):
+ Size of block.
+ overlap(list[int] | tuple[int] | int, optional):
+ Overlap between two blocks. Defaults to 36.
transforms(paddlers.transforms.Compose or None, optional):
Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None.
"""
@@ -539,19 +544,21 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
from osgeo import gdal
except:
import gdal
-
+
if isinstance(block_size, int):
block_size = (block_size, block_size)
elif isinstance(block_size, (tuple, list)) and len(block_size) == 2:
block_size = tuple(block_size)
else:
- raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.")
+ raise ValueError(
+ "`block_size` must be a tuple/list of length 2 or an integer.")
if isinstance(overlap, int):
overlap = (overlap, overlap)
elif isinstance(overlap, (tuple, list)) and len(overlap) == 2:
overlap = tuple(overlap)
else:
- raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.")
+ raise ValueError(
+ "`overlap` must be a tuple/list of length 2 or an integer.")
src_data = gdal.Open(img_file)
width = src_data.RasterXSize
@@ -559,7 +566,8 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
bands = src_data.RasterCount
driver = gdal.GetDriverByName("GTiff")
- file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[0] + ".tif"
+ file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[
+ 0] + ".tif"
if not osp.exists(save_dir):
os.makedirs(save_dir)
save_file = osp.join(save_dir, file_name)
@@ -577,13 +585,16 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=
xsize = int(width - xoff)
if yoff + ysize > height:
ysize = int(height - yoff)
- im = src_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0))
+ im = src_data.ReadAsArray(int(xoff), int(yoff), xsize,
+ ysize).transpose((1, 2, 0))
# fill
h, w = im.shape[:2]
- im_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im.dtype)
+ im_fill = np.zeros(
+ (block_size[1], block_size[0], bands), dtype=im.dtype)
im_fill[:h, :w, :] = im
# predict
- pred = self.predict(im_fill, transforms)["label_map"].astype("uint8")
+ pred = self.predict(im_fill,
+ transforms)["label_map"].astype("uint8")
# overlap
rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize)
mask = (rd_block == pred[:h, :w]) | (rd_block == 255)
@@ -600,10 +611,10 @@ def _preprocess(self, images, transforms, to_tensor=True):
batch_im = list()
batch_ori_shape = list()
for im in images:
+ if isinstance(im, str):
+ im = decode_image(im, to_rgb=False)
+ ori_shape = im.shape[:2]
sample = {'image': im}
- if isinstance(sample['image'], str):
- sample = ImgDecoder(to_rgb=False)(sample)
- ori_shape = sample['image'].shape[:2]
im = transforms(sample)[0]
batch_im.append(im)
batch_ori_shape.append(ori_shape)
@@ -639,7 +650,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms):
scale = float(op.long_size) / float(im_long_size)
h = int(round(h * scale))
w = int(round(w * scale))
- elif op.__class__.__name__ == 'Padding':
+ elif op.__class__.__name__ == 'Pad':
if op.target_size:
target_h, target_w = op.target_size
else:
diff --git a/paddlers/tasks/slim/prune.py b/paddlers/tasks/slim/prune.py
index 9792d0ff..10df7fec 100644
--- a/paddlers/tasks/slim/prune.py
+++ b/paddlers/tasks/slim/prune.py
@@ -41,7 +41,7 @@ def _pruner_template_input(sample, model_type):
def sensitive_prune(pruner, pruned_flops, skip_vars=[], align=None):
- # skip depthwise convolutions
+ # Skip depthwise convolutions
for layer in pruner.model.sublayers():
if isinstance(layer, paddle.nn.layer.conv.Conv2D) and layer._groups > 1:
for param in layer.parameters(include_sublayers=False):
diff --git a/paddlers/tasks/utils/det_metrics/coco_utils.py b/paddlers/tasks/utils/det_metrics/coco_utils.py
index c4a024fa..f62e8bf5 100644
--- a/paddlers/tasks/utils/det_metrics/coco_utils.py
+++ b/paddlers/tasks/utils/det_metrics/coco_utils.py
@@ -35,6 +35,7 @@ def get_infer_results(outs, catid, bias=0):
For example, bbox result is a list and each element contains
image_id, category_id, bbox and score.
"""
+
if outs is None or len(outs) == 0:
raise ValueError(
'The number of valid detection result if zero. Please use reasonable model and check input data.'
@@ -78,6 +79,7 @@ def cocoapi_eval(anns,
max_dets (tuple): COCO evaluation maxDets.
classwise (bool): Whether per-category AP and draw P-R Curve or not.
"""
+
assert coco_gt is not None or anno_file is not None
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
@@ -220,19 +222,19 @@ def loadRes(coco_obj, anns):
def makeplot(rs, ps, outDir, class_name, iou_type):
- """针对某个特定类别,绘制不同评估要求下的准确率和召回率。
- 绘制结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。
-
- Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L13
+ """
+ 针对某个特定类别,绘制不同评估要求下的准确率和召回率。
+ 绘制结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。
- Args:
- rs (np.array): 在不同置信度阈值下计算得到的召回率。
- ps (np.array): 在不同置信度阈值下计算得到的准确率。ps与rs相同位置下的数值为同一个置信度阈值
- 计算得到的准确率与召回率。
- outDir (str): 图表保存的路径。
- class_name (str): 类别名。
- iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。
+ Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L13
+ Args:
+ rs (np.array): 在不同置信度阈值下计算得到的召回率。
+ ps (np.array): 在不同置信度阈值下计算得到的准确率。ps与rs相同位置下的数值为同一个置信度阈值
+ 计算得到的准确率与召回率。
+ outDir (str): 图表保存的路径。
+ class_name (str): 类别名。
+ iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。
"""
import matplotlib.pyplot as plt
@@ -276,21 +278,22 @@ def makeplot(rs, ps, outDir, class_name, iou_type):
def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type, areas=None):
- """针对某个特定类别,分析忽略亚类混淆和类别混淆时的准确率。
+ """
+ 针对某个特定类别,分析忽略亚类混淆和类别混淆时的准确率。
- Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L174
+ Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L174
- Args:
- k (int): 待分析类别的序号。
- cocoDt (pycocotols.coco.COCO): 按COCO类存放的预测结果。
- cocoGt (pycocotols.coco.COCO): 按COCO类存放的真值。
- catId (int): 待分析类别在数据集中的类别id。
- iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。
+ Args:
+ k (int): 待分析类别的序号。
+ cocoDt (pycocotols.coco.COCO): 按COCO类存放的预测结果。
+ cocoGt (pycocotols.coco.COCO): 按COCO类存放的真值。
+ catId (int): 待分析类别在数据集中的类别id。
+ iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。
- Returns:
- int:
- dict: 有关键字'ps_supercategory'和'ps_allcategory'。关键字'ps_supercategory'的键值是忽略亚类间
- 混淆时的准确率,关键字'ps_allcategory'的键值是忽略类别间混淆时的准确率。
+ Returns:
+ int:
+ dict: 有关键字'ps_supercategory'和'ps_allcategory'。关键字'ps_supercategory'的键值是忽略亚类间
+ 混淆时的准确率,关键字'ps_allcategory'的键值是忽略类别间混淆时的准确率。
"""
@@ -362,23 +365,23 @@ def coco_error_analysis(eval_details_file=None,
pred_bbox=None,
pred_mask=None,
save_dir='./output'):
- """逐个分析模型预测错误的原因,并将分析结果以图表的形式展示。
- 分析结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。
-
- Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py
-
- Args:
- eval_details_file (str): 模型评估结果的保存路径,包含真值信息和预测结果。
- gt (list): 数据集的真值信息。默认值为None。
- pred_bbox (list): 模型在数据集上的预测框。默认值为None。
- pred_mask (list): 模型在数据集上的预测mask。默认值为None。
- save_dir (str): 可视化结果保存路径。默认值为'./output'。
+ """
+ 逐个分析模型预测错误的原因,并将分析结果以图表的形式展示。
+ 分析结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。
- Note:
- eval_details_file的优先级更高,只要eval_details_file不为None,
- 就会从eval_details_file提取真值信息和预测结果做分析。
- 当eval_details_file为None时,则用gt、pred_mask、pred_mask做分析。
+ Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py
+ Args:
+ eval_details_file (str): 模型评估结果的保存路径,包含真值信息和预测结果。
+ gt (list): 数据集的真值信息。默认值为None。
+ pred_bbox (list): 模型在数据集上的预测框。默认值为None。
+ pred_mask (list): 模型在数据集上的预测mask。默认值为None。
+ save_dir (str): 可视化结果保存路径。默认值为'./output'。
+
+ Note:
+ eval_details_file的优先级更高,只要eval_details_file不为None,
+ 就会从eval_details_file提取真值信息和预测结果做分析。
+ 当eval_details_file为None时,则用gt、pred_mask、pred_mask做分析。
"""
import multiprocessing as mp
diff --git a/paddlers/tasks/utils/visualize.py b/paddlers/tasks/utils/visualize.py
index fb76f968..2c769954 100644
--- a/paddlers/tasks/utils/visualize.py
+++ b/paddlers/tasks/utils/visualize.py
@@ -25,7 +25,7 @@
def visualize_detection(image, result, threshold=0.5, save_dir='./',
color=None):
"""
- Visualize bbox and mask results
+ Visualize bbox and mask results
"""
if isinstance(image, np.ndarray):
@@ -48,6 +48,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./',
color=None):
"""
Convert segment result to color image, and save added image.
+
Args:
image: the path of origin image
result: the predict result of image
@@ -55,6 +56,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./',
save_dir: the directory for saving visual image
color: the list of a BGR-mode color for each label.
"""
+
label_map = result['label_map'].astype("uint8")
color_map = get_color_map_list(256)
if color is not None:
@@ -104,13 +106,16 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./',
def get_color_map_list(num_classes):
- """ Returns the color map for visualizing the segmentation mask,
- which can support arbitrary number of classes.
+ """
+ Returns the color map for visualizing the segmentation mask, which can support arbitrary number of classes.
+
Args:
num_classes: Number of classes
+
Returns:
The color map
"""
+
color_map = num_classes * [0, 0, 0]
for i in range(0, num_classes):
j = 0
diff --git a/paddlers/tools/yolo_cluster.py b/paddlers/tools/yolo_cluster.py
index 9efbdcf0..0d7941eb 100644
--- a/paddlers/tools/yolo_cluster.py
+++ b/paddlers/tools/yolo_cluster.py
@@ -28,6 +28,7 @@ class BaseAnchorCluster(object):
def __init__(self, num_anchors, cache, cache_path):
"""
Base Anchor Cluster
+
Args:
num_anchors (int): number of clusters
cache (bool): whether using cache
diff --git a/paddlers/transforms/__init__.py b/paddlers/transforms/__init__.py
index 0c10e7de..c5ad12e8 100644
--- a/paddlers/transforms/__init__.py
+++ b/paddlers/transforms/__init__.py
@@ -12,11 +12,45 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+import copy
+import os.path as osp
+
from .operators import *
-from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPadding
+from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPad
from paddlers import transforms as T
+def decode_image(im_path,
+ to_rgb=True,
+ to_uint8=True,
+ decode_bgr=True,
+ decode_sar=True):
+ """
+ Decode an image.
+
+ Args:
+ to_rgb (bool, optional): If True, convert input image(s) from BGR format to RGB format. Defaults to True.
+ to_uint8 (bool, optional): If True, quantize and convert decoded image(s) to uint8 type. Defaults to True.
+ decode_bgr (bool, optional): If True, automatically interpret a non-geo image (e.g. jpeg images) as a BGR image.
+ Defaults to True.
+ decode_sar (bool, optional): If True, automatically interpret a two-channel geo image (e.g. geotiff images) as a
+ SAR image, set this argument to True. Defaults to True.
+ """
+
+ # Do a presence check. `osp.exists` assumes `im_path` is a path-like object.
+ if not osp.exists(im_path):
+ raise ValueError(f"{im_path} does not exist!")
+ decoder = T.DecodeImg(
+ to_rgb=to_rgb,
+ to_uint8=to_uint8,
+ decode_bgr=decode_bgr,
+ decode_sar=decode_sar)
+ # Deepcopy to avoid inplace modification
+ sample = {'image': copy.deepcopy(im_path)}
+ sample = decoder(sample)
+ return sample['image']
+
+
def arrange_transforms(model_type, transforms, mode='train'):
# 给transforms添加arrange操作
if model_type == 'segmenter':
diff --git a/paddlers/transforms/batch_operators.py b/paddlers/transforms/batch_operators.py
index d782931d..c2496c58 100644
--- a/paddlers/transforms/batch_operators.py
+++ b/paddlers/transforms/batch_operators.py
@@ -74,7 +74,7 @@ class BatchRandomResize(Transform):
Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
Args:
- target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]):
+ target_sizes (list[int] | list[list | tuple] | tuple[list | tuple]):
Multiple target sizes, each target size is an int or list/tuple of length 2.
interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
Interpolation method of resize. Defaults to 'LINEAR'.
@@ -93,7 +93,7 @@ def __init__(self, target_sizes, interp='NEAREST'):
interp_dict.keys()))
self.interp = interp
assert isinstance(target_sizes, list), \
- "target_size must be List"
+ "target_size must be a list."
for i, item in enumerate(target_sizes):
if isinstance(item, int):
target_sizes[i] = (item, item)
@@ -113,7 +113,7 @@ class BatchRandomResizeByShort(Transform):
Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
Args:
- short_sizes (List[int], Tuple[int]): Target sizes of the shorter side of the image(s).
+ short_sizes (list[int] | tuple[int]): Target sizes of the shorter side of the image(s).
max_size (int, optional): The upper bound of longer side of the image(s).
If max_size is -1, no upper bound is applied. Defaults to -1.
interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
@@ -134,7 +134,7 @@ def __init__(self, short_sizes, max_size=-1, interp='NEAREST'):
interp_dict.keys()))
self.interp = interp
assert isinstance(short_sizes, list), \
- "short_sizes must be List"
+ "short_sizes must be a list."
self.short_sizes = short_sizes
self.max_size = max_size
@@ -149,9 +149,9 @@ def __call__(self, samples):
return samples
-class _BatchPadding(Transform):
+class _BatchPad(Transform):
def __init__(self, pad_to_stride=0):
- super(_BatchPadding, self).__init__()
+ super(_BatchPad, self).__init__()
self.pad_to_stride = pad_to_stride
def __call__(self, samples):
diff --git a/paddlers/transforms/functions.py b/paddlers/transforms/functions.py
index 11200bac..68c59a67 100644
--- a/paddlers/transforms/functions.py
+++ b/paddlers/transforms/functions.py
@@ -12,9 +12,9 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import cv2
import copy
+import cv2
import numpy as np
import shapely.ops
from shapely.geometry import Polygon, MultiPolygon, GeometryCollection
@@ -126,45 +126,52 @@ def img_flip(im, method=0):
"""
if not len(im.shape) >= 2:
raise ValueError("Shape of image should 2d, 3d or more")
- if method==0 or method=='h':
+ if method == 0 or method == 'h':
return horizontal_flip(im)
- elif method==1 or method=='v':
+ elif method == 1 or method == 'v':
return vertical_flip(im)
- elif method==2 or method=='hv':
+ elif method == 2 or method == 'hv':
return hv_flip(im)
- elif method==3 or method=='rt2lb' or method=='dia':
+ elif method == 3 or method == 'rt2lb' or method == 'dia':
return rt2lb_flip(im)
- elif method==4 or method=='lt2rb' or method=='adia':
+ elif method == 4 or method == 'lt2rb' or method == 'adia':
return lt2rb_flip(im)
else:
return im
+
def horizontal_flip(im):
im = im[:, ::-1, ...]
return im
+
def vertical_flip(im):
im = im[::-1, :, ...]
return im
+
def hv_flip(im):
im = im[::-1, ::-1, ...]
return im
+
def rt2lb_flip(im):
axs_list = list(range(len(im.shape)))
axs_list[:2] = [1, 0]
im = im.transpose(axs_list)
return im
+
def lt2rb_flip(im):
axs_list = list(range(len(im.shape)))
axs_list[:2] = [1, 0]
im = im[::-1, ::-1, ...].transpose(axs_list)
return im
+
# endregion
+
# region rotation
def img_simple_rotate(im, method=0):
"""
@@ -223,30 +230,35 @@ def img_simple_rotate(im, method=0):
"""
if not len(im.shape) >= 2:
raise ValueError("Shape of image should 2d, 3d or more")
- if method==0 or method==90:
+ if method == 0 or method == 90:
return rot_90(im)
- elif method==1 or method==180:
+ elif method == 1 or method == 180:
return rot_180(im)
- elif method==2 or method==270:
+ elif method == 2 or method == 270:
return rot_270(im)
else:
return im
+
def rot_90(im):
axs_list = list(range(len(im.shape)))
axs_list[:2] = [1, 0]
im = im[::-1, :, ...].transpose(axs_list)
return im
+
def rot_180(im):
im = im[::-1, ::-1, ...]
return im
+
def rot_270(im):
axs_list = list(range(len(im.shape)))
axs_list[:2] = [1, 0]
im = im[:, ::-1, ...].transpose(axs_list)
return im
+
+
# endregion
@@ -477,15 +489,16 @@ def select_bands(im, band_list=[1, 2, 3]):
return ima
-def de_haze(im, gamma=False):
- """ Priori defogging of dark channel. (Just RGB)
+def dehaze(im, gamma=False):
+ """
+ Single image haze removal using dark channel prior.
Args:
- im (np.ndarray): The image.
+ im (np.ndarray): Input image.
gamma (bool, optional): Use gamma correction or not. Defaults to False.
Returns:
- np.ndarray: The image after defogged.
+ np.ndarray: The image after dehazed.
"""
def _guided_filter(I, p, r, eps):
@@ -501,7 +514,7 @@ def _guided_filter(I, p, r, eps):
m_b = cv2.boxFilter(b, -1, (r, r))
return m_a * I + m_b
- def _de_fog(im, r, w, maxatmo_mask, eps):
+ def _dehaze(im, r, w, maxatmo_mask, eps):
# im is RGB and range[0, 1]
atmo_mask = np.min(im, 2)
dark_channel = cv2.erode(atmo_mask, np.ones((15, 15)))
@@ -519,7 +532,7 @@ def _de_fog(im, r, w, maxatmo_mask, eps):
if np.max(im) > 1:
im = im / 255.
result = np.zeros(im.shape)
- mask_img, atmo_illum = _de_fog(
+ mask_img, atmo_illum = _dehaze(
im, r=81, w=0.95, maxatmo_mask=0.80, eps=1e-8)
for k in range(3):
result[:, :, k] = (im[:, :, k] - mask_img) / (1 - mask_img / atmo_illum)
@@ -534,11 +547,11 @@ def match_histograms(im, ref):
Match the cumulative histogram of one image to another.
Args:
- im (np.ndarray): The input image.
- ref (np.ndarray): The reference image to match histogram of. `ref` must have the same number of channels as `im`.
+ im (np.ndarray): Input image.
+ ref (np.ndarray): Reference image to match histogram of. `ref` must have the same number of channels as `im`.
Returns:
- np.ndarray: The transformed input image.
+ np.ndarray: Transformed input image.
Raises:
ValueError: When the number of channels of `ref` differs from that of im`.
@@ -553,14 +566,14 @@ def match_by_regression(im, ref, pif_loc=None):
Match the brightness values of two images using a linear regression method.
Args:
- im (np.ndarray): The input image.
- ref (np.ndarray): The reference image to match. `ref` must have the same shape as `im`.
- pif_loc (tuple|None, optional): The spatial locations where pseudo-invariant features (PIFs) are obtained. If
+ im (np.ndarray): Input image.
+ ref (np.ndarray): Reference image to match. `ref` must have the same shape as `im`.
+ pif_loc (tuple|None, optional): Spatial locations where pseudo-invariant features (PIFs) are obtained. If
`pif_loc` is set to None, all pixels in the image will be used as training samples for the regression model.
In other cases, `pif_loc` should be a tuple of np.ndarrays. Default: None.
Returns:
- np.ndarray: The transformed input image.
+ np.ndarray: Transformed input image.
Raises:
ValueError: When the shape of `ref` differs from that of `im`.
diff --git a/paddlers/transforms/operators.py b/paddlers/transforms/operators.py
index fad74a43..d2091aca 100644
--- a/paddlers/transforms/operators.py
+++ b/paddlers/transforms/operators.py
@@ -32,12 +32,12 @@
import paddlers
from .functions import normalize, horizontal_flip, permute, vertical_flip, center_crop, is_poly, \
horizontal_flip_poly, horizontal_flip_rle, vertical_flip_poly, vertical_flip_rle, crop_poly, \
- crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, de_haze, select_bands, \
+ crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, dehaze, select_bands, \
to_intensity, to_uint8, img_flip, img_simple_rotate
__all__ = [
"Compose",
- "ImgDecoder",
+ "DecodeImg",
"Resize",
"RandomResize",
"ResizeByShort",
@@ -50,19 +50,19 @@
"RandomCrop",
"RandomScaleAspect",
"RandomExpand",
- "Padding",
+ "Pad",
"MixupImage",
"RandomDistort",
"RandomBlur",
"RandomSwap",
- "Defogging",
- "DimReducing",
- "BandSelecting",
+ "Dehaze",
+ "ReduceDim",
+ "SelectBand",
"ArrangeSegmenter",
"ArrangeChangeDetector",
"ArrangeClassifier",
"ArrangeDetector",
- "RandomFlipOrRotation",
+ "RandomFlipOrRotate",
]
interp_dict = {
@@ -119,19 +119,31 @@ def __call__(self, sample):
return sample
-class ImgDecoder(Transform):
+class DecodeImg(Transform):
"""
Decode image(s) in input.
+
Args:
- to_rgb (bool, optional): If True, convert input images from BGR format to RGB format. Defaults to True.
+ to_rgb (bool, optional): If True, convert input image(s) from BGR format to RGB format. Defaults to True.
+ to_uint8 (bool, optional): If True, quantize and convert decoded image(s) to uint8 type. Defaults to True.
+ decode_bgr (bool, optional): If True, automatically interpret a non-geo image (e.g., jpeg images) as a BGR image.
+ Defaults to True.
+ decode_sar (bool, optional): If True, automatically interpret a two-channel geo image (e.g. geotiff images) as a
+ SAR image, set this argument to True. Defaults to True.
"""
- def __init__(self, to_rgb=True, to_uint8=True):
- super(ImgDecoder, self).__init__()
+ def __init__(self,
+ to_rgb=True,
+ to_uint8=True,
+ decode_bgr=True,
+ decode_sar=True):
+ super(DecodeImg, self).__init__()
self.to_rgb = to_rgb
self.to_uint8 = to_uint8
+ self.decode_bgr = decode_bgr
+ self.decode_sar = decode_sar
- def read_img(self, img_path, input_channel=3):
+ def read_img(self, img_path):
img_format = imghdr.what(img_path)
name, ext = os.path.splitext(img_path)
if img_format == 'tiff' or ext == '.img':
@@ -140,24 +152,24 @@ def read_img(self, img_path, input_channel=3):
except:
try:
from osgeo import gdal
- except:
- raise Exception(
- "Failed to import gdal! You can try use conda to install gdal"
+ except ImportError:
+ raise ImportError(
+ "Failed to import gdal! Please install GDAL library according to the document."
)
- six.reraise(*sys.exc_info())
dataset = gdal.Open(img_path)
if dataset == None:
- raise Exception('Can not open', img_path)
+ raise IOError('Can not open', img_path)
im_data = dataset.ReadAsArray()
- if im_data.ndim == 2:
+ if im_data.ndim == 2 and self.decode_sar:
im_data = to_intensity(im_data) # is read SAR
im_data = im_data[:, :, np.newaxis]
- elif im_data.ndim == 3:
- im_data = im_data.transpose((1, 2, 0))
+ else:
+ if im_data.ndim == 3:
+ im_data = im_data.transpose((1, 2, 0))
return im_data
elif img_format in ['jpeg', 'bmp', 'png', 'jpg']:
- if input_channel == 3:
+ if self.decode_bgr:
return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH |
cv2.IMREAD_ANYCOLOR | cv2.IMREAD_COLOR)
else:
@@ -166,7 +178,7 @@ def read_img(self, img_path, input_channel=3):
elif ext == '.npy':
return np.load(img_path)
else:
- raise Exception('Image format {} is not supported!'.format(ext))
+ raise TypeError('Image format {} is not supported!'.format(ext))
def apply_im(self, im_path):
if isinstance(im_path, str):
@@ -192,7 +204,7 @@ def apply_mask(self, mask):
except:
raise ValueError("Cannot read the mask file {}!".format(mask))
if len(mask.shape) != 2:
- raise Exception(
+ raise ValueError(
"Mask should be a 1-channel image, but recevied is a {}-channel image.".
format(mask.shape[2]))
return mask
@@ -201,6 +213,7 @@ def apply(self, sample):
"""
Args:
sample (dict): Input sample.
+
Returns:
dict: Decoded sample.
"""
@@ -218,8 +231,8 @@ def apply(self, sample):
im_height, im_width, _ = sample['image'].shape
se_height, se_width = sample['mask'].shape
if im_height != se_height or im_width != se_width:
- raise Exception(
- "The height or width of the im is not same as the mask")
+ raise ValueError(
+ "The height or width of the image is not same as the mask.")
if 'aux_masks' in sample:
sample['aux_masks'] = list(
map(self.apply_mask, sample['aux_masks']))
@@ -237,7 +250,7 @@ class Compose(Transform):
All input images are in Height-Width-Channel ([H, W, C]) format.
Args:
- transforms (List[paddlers.transforms.Transform]): List of data preprocess or augmentations.
+ transforms (list[paddlers.transforms.Transform]): List of data preprocess or augmentations.
Raises:
TypeError: Invalid type of transforms.
ValueError: Invalid length of transforms.
@@ -247,14 +260,14 @@ def __init__(self, transforms, to_uint8=True):
super(Compose, self).__init__()
if not isinstance(transforms, list):
raise TypeError(
- 'Type of transforms is invalid. Must be List, but received is {}'
+ 'Type of transforms is invalid. Must be a list, but received is {}'
.format(type(transforms)))
if len(transforms) < 1:
raise ValueError(
'Length of transforms must not be less than 1, but received is {}'
.format(len(transforms)))
self.transforms = transforms
- self.decode_image = ImgDecoder(to_uint8=to_uint8)
+ self.decode_image = DecodeImg(to_uint8=to_uint8)
self.arrange_outputs = None
self.apply_im_only = False
@@ -295,7 +308,7 @@ class Resize(Transform):
Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
Args:
- target_size (int, List[int] or Tuple[int]): Target size. If int, the height and width share the same target_size.
+ target_size (int, list[int] | tuple[int]): Target size. If int, the height and width share the same target_size.
Otherwise, target_size represents [target height, target width].
interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
Interpolation method of resize. Defaults to 'LINEAR'.
@@ -414,7 +427,7 @@ class RandomResize(Transform):
Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
Args:
- target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]):
+ target_sizes (list[int] | list[list | tuple] | tuple[list | tuple]):
Multiple target sizes, each target size is an int or list/tuple.
interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional):
Interpolation method of resize. Defaults to 'LINEAR'.
@@ -434,7 +447,7 @@ def __init__(self, target_sizes, interp='LINEAR'):
interp_dict.keys()))
self.interp = interp
assert isinstance(target_sizes, list), \
- "target_size must be List"
+ "target_size must be a list."
for i, item in enumerate(target_sizes):
if isinstance(item, int):
target_sizes[i] = (item, item)
@@ -494,7 +507,7 @@ class RandomResizeByShort(Transform):
Attention: If interp is 'RANDOM', the interpolation method will be chose randomly.
Args:
- short_sizes (List[int]): Target size of the shorter side of the image(s).
+ short_sizes (list[int]): Target size of the shorter side of the image(s).
max_size (int, optional): The upper bound of longer side of the image(s). If max_size is -1, no upper bound is applied. Defaults to -1.
interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'.
@@ -513,7 +526,7 @@ def __init__(self, short_sizes, max_size=-1, interp='LINEAR'):
interp_dict.keys()))
self.interp = interp
assert isinstance(short_sizes, list), \
- "short_sizes must be List"
+ "short_sizes must be a list."
self.short_sizes = short_sizes
self.max_size = max_size
@@ -544,7 +557,7 @@ def apply(self, sample):
return sample
-class RandomFlipOrRotation(Transform):
+class RandomFlipOrRotate(Transform):
"""
Flip or Rotate an image in different ways with a certain probability.
@@ -561,7 +574,7 @@ class RandomFlipOrRotation(Transform):
# 定义数据增强
train_transforms = T.Compose([
- T.RandomFlipOrRotation(
+ T.RandomFlipOrRotate(
probs = [0.3, 0.2] # 进行flip增强的概率是0.3,进行rotate增强的概率是0.2,不变的概率是0.5
probsf = [0.3, 0.25, 0, 0, 0] # flip增强时,使用水平flip、垂直flip的概率分别是0.3、0.25,水平且垂直flip、对角线flip、反对角线flip概率均为0,不变的概率是0.45
probsr = [0, 0.65, 0]), # rotate增强时,顺时针旋转90度的概率是0,顺时针旋转180度的概率是0.65,顺时针旋转90度的概率是0,不变的概率是0.35
@@ -574,7 +587,7 @@ def __init__(self,
probs=[0.35, 0.25],
probsf=[0.3, 0.3, 0.2, 0.1, 0.1],
probsr=[0.25, 0.5, 0.25]):
- super(RandomFlipOrRotation, self).__init__()
+ super(RandomFlipOrRotate, self).__init__()
# Change various probabilities into probability intervals, to judge in which mode to flip or rotate
self.probs = [probs[0], probs[0] + probs[1]]
self.probsf = self.get_probs_range(probsf)
@@ -594,6 +607,16 @@ def apply_mask(self, mask, mode_id, flip_mode=True):
mask = img_simple_rotate(mask, mode_id)
return mask
+ def apply_bbox(self, bbox, mode_id, flip_mode=True):
+ raise TypeError(
+ "Currently, `paddlers.transforms.RandomFlipOrRotate` is not available for object detection tasks."
+ )
+
+ def apply_segm(self, bbox, mode_id, flip_mode=True):
+ raise TypeError(
+ "Currently, `paddlers.transforms.RandomFlipOrRotate` is not available for object detection tasks."
+ )
+
def get_probs_range(self, probs):
'''
Change various probabilities into cumulative probabilities
@@ -637,14 +660,43 @@ def apply(self, sample):
mode_p = random.random()
mode_id = self.judge_probs_range(mode_p, self.probsf)
sample['image'] = self.apply_im(sample['image'], mode_id, True)
+ if 'image2' in sample:
+ sample['image2'] = self.apply_im(sample['image2'], mode_id,
+ True)
if 'mask' in sample:
sample['mask'] = self.apply_mask(sample['mask'], mode_id, True)
+ if 'aux_masks' in sample:
+ sample['aux_masks'] = [
+ self.apply_mask(aux_mask, mode_id, True)
+ for aux_mask in sample['aux_masks']
+ ]
+ if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+ sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], mode_id,
+ True)
+ if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+ sample['gt_poly'] = self.apply_segm(sample['gt_poly'], mode_id,
+ True)
elif p_m < self.probs[1]:
mode_p = random.random()
mode_id = self.judge_probs_range(mode_p, self.probsr)
sample['image'] = self.apply_im(sample['image'], mode_id, False)
+ if 'image2' in sample:
+ sample['image2'] = self.apply_im(sample['image2'], mode_id,
+ False)
if 'mask' in sample:
sample['mask'] = self.apply_mask(sample['mask'], mode_id, False)
+ if 'aux_masks' in sample:
+ sample['aux_masks'] = [
+ self.apply_mask(aux_mask, mode_id, False)
+ for aux_mask in sample['aux_masks']
+ ]
+ if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0:
+ sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], mode_id,
+ False)
+ if 'gt_poly' in sample and len(sample['gt_poly']) > 0:
+ sample['gt_poly'] = self.apply_segm(sample['gt_poly'], mode_id,
+ False)
+
return sample
@@ -766,16 +818,16 @@ def apply(self, sample):
class Normalize(Transform):
"""
- Apply min-max normalization to the image(s) in input.
+ Apply normalization to the input image(s). The normalization steps are:
1. im = (im - min_value) * 1 / (max_value - min_value)
2. im = im - mean
3. im = im / std
Args:
- mean(List[float] or Tuple[float], optional): Mean of input image(s). Defaults to [0.485, 0.456, 0.406].
- std(List[float] or Tuple[float], optional): Standard deviation of input image(s). Defaults to [0.229, 0.224, 0.225].
- min_val(List[float] or Tuple[float], optional): Minimum value of input image(s). Defaults to [0, 0, 0, ].
- max_val(List[float] or Tuple[float], optional): Max value of input image(s). Defaults to [255., 255., 255.].
+ mean(list[float] | tuple[float], optional): Mean of input image(s). Defaults to [0.485, 0.456, 0.406].
+ std(list[float] | tuple[float], optional): Standard deviation of input image(s). Defaults to [0.229, 0.224, 0.225].
+ min_val(list[float] | tuple[float], optional): Minimum value of input image(s). Defaults to [0, 0, 0, ].
+ max_val(list[float] | tuple[float], optional): Max value of input image(s). Defaults to [255., 255., 255.].
"""
def __init__(self,
@@ -865,12 +917,12 @@ class RandomCrop(Transform):
4. Resize the cropped area to crop_size by crop_size.
Args:
- crop_size(int, List[int] or Tuple[int]): Target size of the cropped area. If None, the cropped area will not be
+ crop_size(int, list[int] | tuple[int]): Target size of the cropped area. If None, the cropped area will not be
resized. Defaults to None.
- aspect_ratio (List[float], optional): Aspect ratio of cropped region in [min, max] format. Defaults to [.5, 2.].
- thresholds (List[float], optional): Iou thresholds to decide a valid bbox crop.
+ aspect_ratio (list[float], optional): Aspect ratio of cropped region in [min, max] format. Defaults to [.5, 2.].
+ thresholds (list[float], optional): Iou thresholds to decide a valid bbox crop.
Defaults to [.0, .1, .3, .5, .7, .9].
- scaling (List[float], optional): Ratio between the cropped region and the original image in [min, max] format.
+ scaling (list[float], optional): Ratio between the cropped region and the original image in [min, max] format.
Defaults to [.3, 1.].
num_attempts (int, optional): The number of tries before giving up. Defaults to 50.
allow_no_crop (bool, optional): Whether returning without doing crop is allowed. Defaults to True.
@@ -1088,11 +1140,11 @@ class RandomExpand(Transform):
Args:
upper_ratio(float, optional): The maximum ratio to which the original image is expanded. Defaults to 4..
prob(float, optional): The probability of apply expanding. Defaults to .5.
- im_padding_value(List[float] or Tuple[float], optional): RGB filling value for the image. Defaults to (127.5, 127.5, 127.5).
+ im_padding_value(list[float] | tuple[float], optional): RGB filling value for the image. Defaults to (127.5, 127.5, 127.5).
label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
See Also:
- paddlers.transforms.Padding
+ paddlers.transforms.Pad
"""
def __init__(self,
@@ -1120,7 +1172,7 @@ def apply(self, sample):
x = np.random.randint(0, w - im_w)
target_size = (h, w)
offsets = (x, y)
- sample = Padding(
+ sample = Pad(
target_size=target_size,
pad_mode=-1,
offsets=offsets,
@@ -1129,7 +1181,7 @@ def apply(self, sample):
return sample
-class Padding(Transform):
+class Pad(Transform):
def __init__(self,
target_size=None,
pad_mode=0,
@@ -1148,7 +1200,7 @@ def __init__(self,
label_padding_value(int, optional): Filling value for the mask. Defaults to 255.
size_divisor(int): Image width and height after padding is a multiple of coarsest_stride.
"""
- super(Padding, self).__init__()
+ super(Pad, self).__init__()
if isinstance(target_size, (list, tuple)):
if len(target_size) != 2:
raise ValueError(
@@ -1525,20 +1577,20 @@ def apply(self, sample):
return sample
-class Defogging(Transform):
+class Dehaze(Transform):
"""
- Defog input image(s).
+ Dehaze input image(s).
Args:
gamma (bool, optional): Use gamma correction or not. Defaults to False.
"""
def __init__(self, gamma=False):
- super(Defogging, self).__init__()
+ super(Dehaze, self).__init__()
self.gamma = gamma
def apply_im(self, image):
- image = de_haze(image, self.gamma)
+ image = dehaze(image, self.gamma)
return image
def apply(self, sample):
@@ -1548,19 +1600,20 @@ def apply(self, sample):
return sample
-class DimReducing(Transform):
+class ReduceDim(Transform):
"""
- Use PCA to reduce input image(s) dimension.
+ Use PCA to reduce the dimension of input image(s).
Args:
- joblib_path (str): Path of *.joblib about PCA.
+ joblib_path (str): Path of *.joblib file of PCA.
"""
def __init__(self, joblib_path):
- super(DimReducing, self).__init__()
+ super(ReduceDim, self).__init__()
ext = joblib_path.split(".")[-1]
if ext != "joblib":
- raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format(ext))
+ raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format(
+ ext))
self.pca = load(joblib_path)
def apply_im(self, image):
@@ -1577,16 +1630,16 @@ def apply(self, sample):
return sample
-class BandSelecting(Transform):
+class SelectBand(Transform):
"""
- Select the band of the input image(s).
+ Select a set of bands of input image(s).
Args:
- band_list (list, optional): Bands of selected (Start with 1). Defaults to [1, 2, 3].
+ band_list (list, optional): Bands to select (the band index starts with 1). Defaults to [1, 2, 3].
"""
def __init__(self, band_list=[1, 2, 3]):
- super(BandSelecting, self).__init__()
+ super(SelectBand, self).__init__()
self.band_list = band_list
def apply_im(self, image):
diff --git a/paddlers/utils/__init__.py b/paddlers/utils/__init__.py
index 832793d1..842e5331 100644
--- a/paddlers/utils/__init__.py
+++ b/paddlers/utils/__init__.py
@@ -21,4 +21,4 @@
from .env import get_environ_info, get_num_workers, init_parallel_env
from .download import download_and_decompress, decompress
from .stats import SmoothedValue, TrainingStats
-from .shm import _get_shared_memory_size_in_M
\ No newline at end of file
+from .shm import _get_shared_memory_size_in_M
diff --git a/paddlers/utils/stats.py b/paddlers/utils/stats.py
index 7b4f09a7..447753fa 100644
--- a/paddlers/utils/stats.py
+++ b/paddlers/utils/stats.py
@@ -17,6 +17,7 @@
import numpy as np
+
class SmoothedValue(object):
"""Track a series of values and provide access to smoothed values over a
window.
diff --git a/requirements.txt b/requirements.txt
index 1cd4bfde..706bb439 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -18,5 +18,6 @@ munch
natsort
geojson
colorama
+filelock
# # Self installation
# GDAL >= 3.1.3
diff --git a/tests/check_coverage.sh b/tests/check_coverage.sh
new file mode 100644
index 00000000..7d476c91
--- /dev/null
+++ b/tests/check_coverage.sh
@@ -0,0 +1,6 @@
+#!/usr/bin/env bash
+
+bash download_test_data.sh
+coverage run --source paddlers,$(ls -d ../tools/* | tr '\n' ',') --omit=../paddlers/models/* -m unittest discover -v
+coverage report
+coverage html -d coverage_html
\ No newline at end of file
diff --git a/tests/components/__init__.py b/tests/components/__init__.py
new file mode 100644
index 00000000..29c8b7d1
--- /dev/null
+++ b/tests/components/__init__.py
@@ -0,0 +1,13 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/tests/data/.gitignore b/tests/data/.gitignore
new file mode 100644
index 00000000..bd590fd8
--- /dev/null
+++ b/tests/data/.gitignore
@@ -0,0 +1,2 @@
+*.tar.gz
+*/
\ No newline at end of file
diff --git a/tests/data/README.md b/tests/data/README.md
new file mode 100644
index 00000000..94ce141c
--- /dev/null
+++ b/tests/data/README.md
@@ -0,0 +1,5 @@
+# Testing Data
+
+This directory stores real samples that can be used for testing.
+
+*ssmt* means single-source-multi-temporal and *ssst* means single-source-single-temporal.
diff --git a/tests/data/__init__.py b/tests/data/__init__.py
new file mode 100644
index 00000000..c9b33527
--- /dev/null
+++ b/tests/data/__init__.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .data_utils import *
diff --git a/tests/data/data_utils.py b/tests/data/data_utils.py
new file mode 100644
index 00000000..b0d421ba
--- /dev/null
+++ b/tests/data/data_utils.py
@@ -0,0 +1,378 @@
+# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os.path as osp
+import re
+import imghdr
+import platform
+from collections import OrderedDict
+from functools import partial, wraps
+
+import numpy as np
+
+__all__ = ['build_input_from_file']
+
+
+def norm_path(path):
+ win_sep = "\\"
+ other_sep = "/"
+ if platform.system() == "Windows":
+ path = win_sep.join(path.split(other_sep))
+ else:
+ path = other_sep.join(path.split(win_sep))
+ return path
+
+
+def is_pic(im_path):
+ valid_suffix = [
+ 'JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png', 'npy'
+ ]
+ suffix = im_path.split('.')[-1]
+ if suffix in valid_suffix:
+ return True
+ im_format = imghdr.what(im_path)
+ _, ext = osp.splitext(im_path)
+ if im_format == 'tiff' or ext == '.img':
+ return True
+ return False
+
+
+def get_full_path(p, prefix=''):
+ p = norm_path(p)
+ return osp.join(prefix, p)
+
+
+def silent(func):
+ def _do_nothing(*args, **kwargs):
+ pass
+
+ @wraps(func)
+ def _wrapper(*args, **kwargs):
+ import builtins
+ print = builtins.print
+ builtins.print = _do_nothing
+ ret = func(*args, **kwargs)
+ builtins.print = print
+ return ret
+
+ return _wrapper
+
+
+class ConstrSample(object):
+ def __init__(self, prefix, label_list):
+ super().__init__()
+ self.prefix = prefix
+ self.label_list_obj = self.read_label_list(label_list)
+ self.get_full_path = partial(get_full_path, prefix=self.prefix)
+
+ def read_label_list(self, label_list):
+ if label_list is None:
+ return None
+ cname2cid = OrderedDict()
+ label_id = 0
+ with open(label_list, 'r') as f:
+ for line in f:
+ cname2cid[line.strip()] = label_id
+ label_id += 1
+ return cname2cid
+
+ def __call__(self, *parts):
+ raise NotImplementedError
+
+
+class ConstrSegSample(ConstrSample):
+ def __call__(self, im_path, mask_path):
+ return {
+ 'image': self.get_full_path(im_path),
+ 'mask': self.get_full_path(mask_path)
+ }
+
+
+class ConstrCdSample(ConstrSample):
+ def __call__(self, im1_path, im2_path, mask_path, *aux_mask_paths):
+ sample = {
+ 'image_t1': self.get_full_path(im1_path),
+ 'image_t2': self.get_full_path(im2_path),
+ 'mask': self.get_full_path(mask_path)
+ }
+ if len(aux_mask_paths) > 0:
+ sample['aux_masks'] = [
+ self.get_full_path(p) for p in aux_mask_paths
+ ]
+ return sample
+
+
+class ConstrClasSample(ConstrSample):
+ def __call__(self, im_path, label):
+ return {'image': self.get_full_path(im_path), 'label': int(label)}
+
+
+class ConstrDetSample(ConstrSample):
+ def __init__(self, prefix, label_list):
+ super().__init__(prefix, label_list)
+ self.ct = 0
+
+ def __call__(self, im_path, ann_path):
+ im_path = self.get_full_path(im_path)
+ ann_path = self.get_full_path(ann_path)
+ # TODO: Precisely recognize the annotation format
+ if ann_path.endswith('.json'):
+ im_dir = im_path
+ return self._parse_coco_files(im_dir, ann_path)
+ elif ann_path.endswith('.xml'):
+ return self._parse_voc_files(im_path, ann_path)
+ else:
+ raise ValueError("Cannot recognize the annotation format")
+
+ def _parse_voc_files(self, im_path, ann_path):
+ import xml.etree.ElementTree as ET
+
+ cname2cid = self.label_list_obj
+ tree = ET.parse(ann_path)
+ # The xml file must contain id.
+ if tree.find('id') is None:
+ im_id = np.asarray([self.ct])
+ else:
+ self.ct = int(tree.find('id').text)
+ im_id = np.asarray([int(tree.find('id').text)])
+ pattern = re.compile('', re.IGNORECASE)
+ size_tag = pattern.findall(str(ET.tostringlist(tree.getroot())))
+ if len(size_tag) > 0:
+ size_tag = size_tag[0][1:-1]
+ size_element = tree.find(size_tag)
+ pattern = re.compile('', re.IGNORECASE)
+ width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][
+ 1:-1]
+ im_w = float(size_element.find(width_tag).text)
+ pattern = re.compile('', re.IGNORECASE)
+ height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][
+ 1:-1]
+ im_h = float(size_element.find(height_tag).text)
+ else:
+ im_w = 0
+ im_h = 0
+
+ pattern = re.compile('