diff --git a/.github/workflows/build_and_test.yaml b/.github/workflows/build_and_test.yaml new file mode 100644 index 00000000..339634d1 --- /dev/null +++ b/.github/workflows/build_and_test.yaml @@ -0,0 +1,81 @@ +name: build and test + +on: + push: + branches: + - develop + - "tests/**" + paths-ignore: + - "docs/**" + - "README.md" + pull_request: + branches: + - develop + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + build_and_test_cpu: + runs-on: ${{ matrix.os }} + strategy: + matrix: + os: [ubuntu-latest, windows-latest] + python-version: ["3.7", "3.8"] + include: + - python-version: "3.7" + os: windows-latest + gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/cp37/GDAL-3.3.3-cp37-cp37m-win_amd64.whl + - python-version: "3.7" + os: ubuntu-latest + gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl + - python-version: "3.8" + os: windows-latest + gdal-whl-url: https://download.lfd.uci.edu/pythonlibs/archived/GDAL-3.3.3-cp38-cp38-win_amd64.whl + - python-version: "3.8" + os: ubuntu-latest + gdal-whl-url: https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp38-cp38-manylinux_2_5_x86_64.manylinux1_x86_64.whl + fail-fast: false + steps: + - uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python-version }} + - name: Upgrade pip + run: python -m pip install pip --upgrade --user + - name: Install PaddlePaddle + run: python -m pip install paddlepaddle==2.3.1 -i https://mirror.baidu.com/pypi/simple + - name: Install PaddleRS + run: | + python -m pip install -r requirements.txt + python -m pip install -e . + - name: Install GDAL + run: python -m pip install ${{ matrix.gdal-whl-url }} + - name: Run unittests + run: | + cd tests + bash run_fast_tests.sh + shell: bash + + build_and_test_cuda102: + runs-on: ubuntu-18.04 + container: + image: registry.baidubce.com/paddlepaddle/paddle:2.3.1-gpu-cuda10.2-cudnn7 + steps: + - uses: actions/checkout@v3 + - name: Upgrade pip + run: python3.7 -m pip install pip --upgrade --user + - name: Install PaddleRS + run: | + python3.7 -m pip install -r requirements.txt + python3.7 -m pip install -e . + - name: Install GDAL + run: python3.7 -m pip install https://versaweb.dl.sourceforge.net/project/gdal-wheels-for-linux/GDAL-3.4.1-cp37-cp37m-manylinux_2_5_x86_64.manylinux1_x86_64.whl + # Do not run unittests, because there is NO GPU in the machine. + # - name: Run unittests + # run: | + # cd tests + # bash run_fast_tests.sh + # shell: bash \ No newline at end of file diff --git a/.github/workflows/lint.yaml b/.github/workflows/lint.yaml new file mode 100644 index 00000000..939add7f --- /dev/null +++ b/.github/workflows/lint.yaml @@ -0,0 +1,25 @@ +name: lint + +on: [push, pull_request] + +concurrency: + group: ${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Set up Python 3.7 + uses: actions/setup-python@v4 + with: + python-version: 3.7 + - name: Upgrade pip + run: python -m pip install pip --upgrade --user + - name: Install pre-commit hooks + run: | + pip install pre-commit + pre-commit install + - name: Lint + run: pre-commit run --all-files \ No newline at end of file diff --git a/README.md b/README.md index 3626156d..193217a6 100644 --- a/README.md +++ b/README.md @@ -6,9 +6,9 @@ **基于飞桨框架开发的高性能遥感图像处理开发套件,端到端地完成从训练到部署的全流程遥感深度学习应用。** - - - [![License](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) + + [![license](https://img.shields.io/badge/license-Apache%202-blue.svg)](LICENSE) + [![build status](https://github.com/PaddleCV-SIG/PaddleRS/workflows/build_and_test.yaml/badge.svg?branch=develop)](https://github.com/PaddleCV-SIG/PaddleRS/actions) ![python version](https://img.shields.io/badge/python-3.7+-orange.svg) ![support os](https://img.shields.io/badge/os-linux%2C%20win%2C%20mac-yellow.svg) @@ -92,7 +92,7 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
  • ResizeByShort
  • RandomResizeByShort
  • ResizeByLong
  • -
  • RandomFlipOrRotation
  • +
  • RandomFlipOrRotate
  • RandomHorizontalFlip
  • RandomVerticalFlip
  • Normalize
  • @@ -100,13 +100,13 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处
  • RandomCrop
  • RandomScaleAspect
  • RandomExpand
  • -
  • Padding
  • +
  • Pad
  • MixupImage
  • RandomDistort
  • RandomBlur
  • -
  • Defogging
  • -
  • DimReducing
  • -
  • BandSelecting
  • +
  • Dehaze
  • +
  • ReduceDim
  • +
  • SelectBand
  • RandomSwap
  • @@ -223,4 +223,3 @@ PaddleRS是遥感科研院所、相关高校共同基于飞桨开发的遥感处 year={2022} } ``` - diff --git a/deploy/export/README.md b/deploy/export/README.md index ea03a4b1..d3e2eb7a 100644 --- a/deploy/export/README.md +++ b/deploy/export/README.md @@ -60,4 +60,3 @@ python deploy/export_model.py --model_dir=./output/deeplabv3p/best_model/ --save - 对于检测模型中的YOLO/PPYOLO系列模型,请保证输入影像的`w`和`h`有相同取值、且均为32的倍数;指定`--fixed_input_shape`时,R-CNN模型的`w`和`h`也均需为32的倍数。 - 指定`[w,h]`时,请使用半角逗号(`,`)分隔`w`和`h`,二者之间不允许存在空格等其它字符。 - 将`w`和`h`设得越大,则模型在推理过程中的耗时和内存/显存占用越高。不过,如果`w`和`h`过小,则可能对模型的精度存在较大负面影响。 -- 对于变化检测模型BIT,请保证指定`--fixed_input_shape`,并且数值不包含负数,因为BIT用到空间注意力,需要从tensor中获取`b,c,h,w`的属性,若为负数则报错。 diff --git a/deploy/export/export_model.py b/deploy/export/export_model.py index 0fc3c9d1..b43c7fc2 100644 --- a/deploy/export/export_model.py +++ b/deploy/export/export_model.py @@ -21,9 +21,23 @@ def get_parser(): parser = argparse.ArgumentParser() - parser.add_argument('--model_dir', '-m', type=str, default=None, help='model directory path') - parser.add_argument('--save_dir', '-s', type=str, default=None, help='path to save inference model') - parser.add_argument('--fixed_input_shape', '-fs', type=str, default=None, + parser.add_argument( + '--model_dir', + '-m', + type=str, + default=None, + help='model directory path') + parser.add_argument( + '--save_dir', + '-s', + type=str, + default=None, + help='path to save inference model') + parser.add_argument( + '--fixed_input_shape', + '-fs', + type=str, + default=None, help="export inference model with fixed input shape: [w,h] or [n,c,w,h]") return parser @@ -39,13 +53,17 @@ def get_parser(): fixed_input_shape = literal_eval(args.fixed_input_shape) # Check validaty if not isinstance(fixed_input_shape, list): - raise ValueError("fixed_input_shape should be of None or list type.") + raise ValueError( + "fixed_input_shape should be of None or list type.") if len(fixed_input_shape) not in (2, 4): - raise ValueError("fixed_input_shape contains an incorrect number of elements.") + raise ValueError( + "fixed_input_shape contains an incorrect number of elements.") if fixed_input_shape[-1] <= 0 or fixed_input_shape[-2] <= 0: - raise ValueError("the input width and height must be positive integers.") - if len(fixed_input_shape)==4 and fixed_input_shape[1] <= 0: - raise ValueError("the number of input channels must be a positive integer.") + raise ValueError( + "Input width and height must be positive integers.") + if len(fixed_input_shape) == 4 and fixed_input_shape[1] <= 0: + raise ValueError( + "The number of input channels must be a positive integer.") # Set environment variables os.environ['PADDLEX_EXPORT_STAGE'] = 'True' @@ -56,4 +74,4 @@ def get_parser(): # Do dynamic-to-static cast # XXX: Invoke a protected (single underscore) method outside of subclasses. - model._export_inference_model(args.save_dir, fixed_input_shape) \ No newline at end of file + model._export_inference_model(args.save_dir, fixed_input_shape) diff --git a/docs/README.md b/docs/README.md index 2479f0fd..f2cd104d 100644 --- a/docs/README.md +++ b/docs/README.md @@ -1,5 +1,3 @@ -PaddleSeg commit fec42fd869b6f796c74cd510671595e3512bc8e9 - # 开发规范 请注意,paddlers/models/ppxxx系列除了修改import路径和支持多通道模型外,不要增删改任何代码。 -新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。 \ No newline at end of file +新增的模型需放在paddlers/models/下的seg、det、cls、cd目录下。 diff --git a/docs/apis/model_zoo.md b/docs/apis/model_zoo.md index 45570b21..d7c16932 100644 --- a/docs/apis/model_zoo.md +++ b/docs/apis/model_zoo.md @@ -4,18 +4,18 @@ PaddleRS的基础模型库来自[PaddleClas](https://github.com/PaddlePaddle/Pad ## 自定义模型库 -| 模型名称 | 用途 | -| --------------- | -------- | +| 模型名称 | 用途 | +| --------------- | -------- | | FarSeg | 语义分割 | | BIT | 变化检测 | | CDNet | 变化检测 | | DSIFN | 变化检测 | | STANet | 变化检测 | -| SNUNet | 变化检测 | +| SNUNet | 变化检测 | | DSAMNet | 变化检测 | -| FCEarlyFusion | 变化检测 | -| FCSiamConc | 变化检测 | -| FCSiamDiff | 变化检测 | +| FCEarlyFusion | 变化检测 | +| FCSiamConc | 变化检测 | +| FCSiamDiff | 变化检测 | ## 如何导入 diff --git a/docs/apis/transforms.md b/docs/apis/transforms.md index f05e9d26..9721ec7d 100644 --- a/docs/apis/transforms.md +++ b/docs/apis/transforms.md @@ -1,6 +1,6 @@ # 数据增强 -PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`ImgDecoder`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。 +PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Compose`进行使用,数据读取方面通过`DecodeImg`可以对不只三通道RGB图像进行读取,还可以对SAR以及多通道图像进行读取,提供有转为`uint8`的选项。此外提供以下数据增强的方法。 | 数据增强名称 | 用途 | 任务 | ... | | -------------------- | ----------------------------------------------- | -------- | ---- | @@ -16,13 +16,13 @@ PaddleRS将多种任务需要的数据增强进行了有机整合,均通过`Co | RandomCrop | 对输入进行随机中心裁剪 | 所有 | ... | | RandomScaleAspect | 裁剪输入并重新调整大小至原始大小 | 所有 | ... | | RandomExpand | 通过根据随机偏移填充来随机扩展输入 | 所有 | ... | -| Padding | 将输入填充到指定的大小 | 所有 | ... | +| Pad | 将输入填充到指定的大小 | 所有 | ... | | MixupImage | 将两张图片和它们的`gt_bbbox/gt_score`混合在一起 | 目标检测 | ... | | RandomDistort | 对输入进行随机色彩变换 | 所有 | ... | | RandomBlur | 对输入进行随机模糊 | 所有 | ... | -| Defogging | 对输入图像进行去雾 | 所有 | ... | -| DimReducing | 对输入图像进行降维 | 所有 | ... | -| BandSelecting | 选择输入图像的波段 | 所有 | ... | +| Dehaze | 对输入图像进行去雾 | 所有 | ... | +| ReduceDim | 对输入图像进行降维 | 所有 | ... | +| SelectBand | 选择输入图像的波段 | 所有 | ... | | RandomSwap | 随机交换两个输入图像 | 变化检测 | ... | | ... | ... | | ... | diff --git a/docs/data/coco_tools_cn.md b/docs/data/coco_tools_cn.md index faf2ee10..362c4272 100644 --- a/docs/data/coco_tools_cn.md +++ b/docs/data/coco_tools_cn.md @@ -85,7 +85,7 @@ Args_show = True ------------------------------------------------Info------------------------------------------------ json read... -json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories']) +json keys: dict_keys(['info', 'licenses', 'images', 'annotations', 'categories']) ***********************info*********************** Content Type: dict @@ -409,7 +409,7 @@ Args_show = True ------------------------------------------------Info------------------------------------------------ json read... -json keys: dict_keys(['images', 'categories']) +json keys: dict_keys(['images', 'categories']) **********************images********************** Content Type: list diff --git a/docs/data/dataset_summary.md b/docs/data/dataset_summary.md index 0cdd2783..f36ca613 100644 --- a/docs/data/dataset_summary.md +++ b/docs/data/dataset_summary.md @@ -215,4 +215,4 @@ | [9-5](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 1.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 5120 * 5120 | 1 | 300 | __ | tif, jpg | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 | | [9-6](https://aistudio.baidu.com/aistudio/datasetdetail/136567) | [WHU TCL SatMVS 2.0](http://gpcv.whu.edu.cn/data/whu_tlc.html) | 图像生成 | 768 * 384 | 1 | 5011 | __ | tif | __ | 2.1m, 2.5m | __ | 卫星影像 | ZY3 | 2021 | 武汉大学 | http://gpcv.whu.edu.cn/data/whu_tlc.html | https://aistudio.baidu.com/aistudio/datasetdetail/136567 | | 9-7 | [DLR-ACD](https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/) | 图像生成 | 3619 * 5226 | 3 | 33 | 1 | __ | __ | 0.045~ 0.15m | __ | 航拍影像 | 航拍影像 | 2019 | German Aerospace Center | https://www.dlr.de/eoc/en/desktopdefault.aspx/tabid-12760/22294_read-58354/ | | -| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | | \ No newline at end of file +| 9-8 | [SEN12MS-CR](https://mediatum.ub.tum.de/1554803) | 图像生成 | 256 * 256 | 13, 2 | 122218 | __ | __ | __ | __ | __ | 卫星影像 | Sentinel1, Sentinel2 | 2020 | TUM | https://mediatum.ub.tum.de/1554803 | | diff --git a/docs/quick_start.md b/docs/quick_start.md index 8e8b6579..ff9556b8 100644 --- a/docs/quick_start.md +++ b/docs/quick_start.md @@ -39,4 +39,4 @@ python -m paddle.distributed.launch --gpus 0,1 tutorials/train/semantic_segmenta visualdl --logdir output/deeplabv3p_resnet50_multi_channel/vdl_log --port 8001 ``` -服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001 \ No newline at end of file +服务启动后,使用浏览器打开 https://0.0.0.0:8001 或 https://localhost:8001 diff --git a/paddlers/__init__.py b/paddlers/__init__.py index 3e97e1ff..3b71c112 100644 --- a/paddlers/__init__.py +++ b/paddlers/__init__.py @@ -21,4 +21,4 @@ log_level = 2 -from . import tasks, datasets, transforms, utils, tools, models, deploy \ No newline at end of file +from . import tasks, datasets, transforms, utils, tools, models, deploy diff --git a/paddlers/custom_models/__init__.py b/paddlers/custom_models/__init__.py index ea0abdf8..cbfbb1ff 100644 --- a/paddlers/custom_models/__init__.py +++ b/paddlers/custom_models/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import cls, det, seg, gan, cd \ No newline at end of file +from . import cls, det, seg, gan, cd diff --git a/paddlers/custom_models/cd/backbones/__init__.py b/paddlers/custom_models/cd/backbones/__init__.py index eeae9aa1..29c8b7d1 100644 --- a/paddlers/custom_models/cd/backbones/__init__.py +++ b/paddlers/custom_models/cd/backbones/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/paddlers/custom_models/cd/bit.py b/paddlers/custom_models/cd/bit.py index af9f2f90..0b38fbe0 100644 --- a/paddlers/custom_models/cd/bit.py +++ b/paddlers/custom_models/cd/bit.py @@ -22,6 +22,15 @@ from .param_init import KaimingInitMixin +def calc_product(*args): + if len(args) < 1: + raise ValueError + ret = args[0] + for arg in args[1:]: + ret *= arg + return ret + + class BIT(nn.Layer): """ The BIT implementation based on PaddlePaddle. @@ -131,9 +140,10 @@ def __init__(self, def _get_semantic_tokens(self, x): b, c = x.shape[:2] att_map = self.conv_att(x) - att_map = att_map.reshape((b, self.token_len, 1, -1)) + att_map = att_map.reshape( + (b, self.token_len, 1, calc_product(*att_map.shape[2:]))) att_map = F.softmax(att_map, axis=-1) - x = x.reshape((b, 1, c, -1)) + x = x.reshape((b, 1, c, att_map.shape[-1])) tokens = (x * att_map).sum(-1) return tokens @@ -172,7 +182,7 @@ def forward(self, t1, t2): else: token1 = self._get_reshaped_tokens(x1) token2 = self._get_reshaped_tokens(x2) - + # Transformer encoder forward token = paddle.concat([token1, token2], axis=1) token = self.encode(token) @@ -253,6 +263,7 @@ def __init__(self, inner_dim = head_dim * n_heads self.n_heads = n_heads + self.head_dim = head_dim self.scale = dim**-0.5 self.apply_softmax = apply_softmax @@ -272,9 +283,10 @@ def forward(self, x, ref): k = self.fc_k(ref) v = self.fc_v(ref) - q = q.reshape((b, n, h, -1)).transpose((0, 2, 1, 3)) - k = k.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3)) - v = v.reshape((b, paddle.shape(ref)[1], h, -1)).transpose((0, 2, 1, 3)) + q = q.reshape((b, n, h, self.head_dim)).transpose((0, 2, 1, 3)) + rn = ref.shape[1] + k = k.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3)) + v = v.reshape((b, rn, h, self.head_dim)).transpose((0, 2, 1, 3)) mult = paddle.matmul(q, k, transpose_y=True) * self.scale diff --git a/paddlers/custom_models/cd/fc_ef.py b/paddlers/custom_models/cd/fc_ef.py index a0086882..a831485b 100644 --- a/paddlers/custom_models/cd/fc_ef.py +++ b/paddlers/custom_models/cd/fc_ef.py @@ -131,8 +131,7 @@ def forward(self, t1, t2): # Stage 4d x4d = self.upconv4(x4p) - pad4 = (0, paddle.shape(x43)[3] - paddle.shape(x4d)[3], 0, - paddle.shape(x43)[2] - paddle.shape(x4d)[2]) + pad4 = (0, x43.shape[3] - x4d.shape[3], 0, x43.shape[2] - x4d.shape[2]) x4d = paddle.concat([F.pad(x4d, pad=pad4, mode='replicate'), x43], 1) x43d = self.do43d(self.conv43d(x4d)) x42d = self.do42d(self.conv42d(x43d)) @@ -140,8 +139,7 @@ def forward(self, t1, t2): # Stage 3d x3d = self.upconv3(x41d) - pad3 = (0, paddle.shape(x33)[3] - paddle.shape(x3d)[3], 0, - paddle.shape(x33)[2] - paddle.shape(x3d)[2]) + pad3 = (0, x33.shape[3] - x3d.shape[3], 0, x33.shape[2] - x3d.shape[2]) x3d = paddle.concat([F.pad(x3d, pad=pad3, mode='replicate'), x33], 1) x33d = self.do33d(self.conv33d(x3d)) x32d = self.do32d(self.conv32d(x33d)) @@ -149,16 +147,14 @@ def forward(self, t1, t2): # Stage 2d x2d = self.upconv2(x31d) - pad2 = (0, paddle.shape(x22)[3] - paddle.shape(x2d)[3], 0, - paddle.shape(x22)[2] - paddle.shape(x2d)[2]) + pad2 = (0, x22.shape[3] - x2d.shape[3], 0, x22.shape[2] - x2d.shape[2]) x2d = paddle.concat([F.pad(x2d, pad=pad2, mode='replicate'), x22], 1) x22d = self.do22d(self.conv22d(x2d)) x21d = self.do21d(self.conv21d(x22d)) # Stage 1d x1d = self.upconv1(x21d) - pad1 = (0, paddle.shape(x12)[3] - paddle.shape(x1d)[3], 0, - paddle.shape(x12)[2] - paddle.shape(x1d)[2]) + pad1 = (0, x12.shape[3] - x1d.shape[3], 0, x12.shape[2] - x1d.shape[2]) x1d = paddle.concat([F.pad(x1d, pad=pad1, mode='replicate'), x12], 1) x12d = self.do12d(self.conv12d(x1d)) x11d = self.conv11d(x12d) diff --git a/paddlers/custom_models/cd/fc_siam_conc.py b/paddlers/custom_models/cd/fc_siam_conc.py index af705433..bbe2632f 100644 --- a/paddlers/custom_models/cd/fc_siam_conc.py +++ b/paddlers/custom_models/cd/fc_siam_conc.py @@ -154,8 +154,8 @@ def forward(self, t1, t2): # Decode # Stage 4d x4d = self.upconv4(x4p) - pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0, - paddle.shape(x43_1)[2] - paddle.shape(x4d)[2]) + pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0, + x43_1.shape[2] - x4d.shape[2]) x4d = paddle.concat( [F.pad(x4d, pad=pad4, mode='replicate'), x43_1, x43_2], 1) x43d = self.do43d(self.conv43d(x4d)) @@ -164,8 +164,8 @@ def forward(self, t1, t2): # Stage 3d x3d = self.upconv3(x41d) - pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0, - paddle.shape(x33_1)[2] - paddle.shape(x3d)[2]) + pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0, + x33_1.shape[2] - x3d.shape[2]) x3d = paddle.concat( [F.pad(x3d, pad=pad3, mode='replicate'), x33_1, x33_2], 1) x33d = self.do33d(self.conv33d(x3d)) @@ -174,8 +174,8 @@ def forward(self, t1, t2): # Stage 2d x2d = self.upconv2(x31d) - pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0, - paddle.shape(x22_1)[2] - paddle.shape(x2d)[2]) + pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0, + x22_1.shape[2] - x2d.shape[2]) x2d = paddle.concat( [F.pad(x2d, pad=pad2, mode='replicate'), x22_1, x22_2], 1) x22d = self.do22d(self.conv22d(x2d)) @@ -183,8 +183,8 @@ def forward(self, t1, t2): # Stage 1d x1d = self.upconv1(x21d) - pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0, - paddle.shape(x12_1)[2] - paddle.shape(x1d)[2]) + pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0, + x12_1.shape[2] - x1d.shape[2]) x1d = paddle.concat( [F.pad(x1d, pad=pad1, mode='replicate'), x12_1, x12_2], 1) x12d = self.do12d(self.conv12d(x1d)) diff --git a/paddlers/custom_models/cd/fc_siam_diff.py b/paddlers/custom_models/cd/fc_siam_diff.py index 9343cfed..b60b5dbf 100644 --- a/paddlers/custom_models/cd/fc_siam_diff.py +++ b/paddlers/custom_models/cd/fc_siam_diff.py @@ -154,8 +154,8 @@ def forward(self, t1, t2): # Decode # Stage 4d x4d = self.upconv4(x4p) - pad4 = (0, paddle.shape(x43_1)[3] - paddle.shape(x4d)[3], 0, - paddle.shape(x43_1)[2] - paddle.shape(x4d)[2]) + pad4 = (0, x43_1.shape[3] - x4d.shape[3], 0, + x43_1.shape[2] - x4d.shape[2]) x4d = F.pad(x4d, pad=pad4, mode='replicate') x4d = paddle.concat([x4d, paddle.abs(x43_1 - x43_2)], 1) x43d = self.do43d(self.conv43d(x4d)) @@ -164,8 +164,8 @@ def forward(self, t1, t2): # Stage 3d x3d = self.upconv3(x41d) - pad3 = (0, paddle.shape(x33_1)[3] - paddle.shape(x3d)[3], 0, - paddle.shape(x33_1)[2] - paddle.shape(x3d)[2]) + pad3 = (0, x33_1.shape[3] - x3d.shape[3], 0, + x33_1.shape[2] - x3d.shape[2]) x3d = F.pad(x3d, pad=pad3, mode='replicate') x3d = paddle.concat([x3d, paddle.abs(x33_1 - x33_2)], 1) x33d = self.do33d(self.conv33d(x3d)) @@ -174,8 +174,8 @@ def forward(self, t1, t2): # Stage 2d x2d = self.upconv2(x31d) - pad2 = (0, paddle.shape(x22_1)[3] - paddle.shape(x2d)[3], 0, - paddle.shape(x22_1)[2] - paddle.shape(x2d)[2]) + pad2 = (0, x22_1.shape[3] - x2d.shape[3], 0, + x22_1.shape[2] - x2d.shape[2]) x2d = F.pad(x2d, pad=pad2, mode='replicate') x2d = paddle.concat([x2d, paddle.abs(x22_1 - x22_2)], 1) x22d = self.do22d(self.conv22d(x2d)) @@ -183,8 +183,8 @@ def forward(self, t1, t2): # Stage 1d x1d = self.upconv1(x21d) - pad1 = (0, paddle.shape(x12_1)[3] - paddle.shape(x1d)[3], 0, - paddle.shape(x12_1)[2] - paddle.shape(x1d)[2]) + pad1 = (0, x12_1.shape[3] - x1d.shape[3], 0, + x12_1.shape[2] - x1d.shape[2]) x1d = F.pad(x1d, pad=pad1, mode='replicate') x1d = paddle.concat([x1d, paddle.abs(x12_1 - x12_2)], 1) x12d = self.do12d(self.conv12d(x1d)) diff --git a/paddlers/custom_models/cd/layers/__init__.py b/paddlers/custom_models/cd/layers/__init__.py index ed9d985f..9314c566 100644 --- a/paddlers/custom_models/cd/layers/__init__.py +++ b/paddlers/custom_models/cd/layers/__init__.py @@ -13,4 +13,4 @@ # limitations under the License. from .blocks import * -from .attention import ChannelAttention, SpatialAttention, CBAM \ No newline at end of file +from .attention import ChannelAttention, SpatialAttention, CBAM diff --git a/paddlers/custom_models/cd/layers/blocks.py b/paddlers/custom_models/cd/layers/blocks.py index c379d665..2661900f 100644 --- a/paddlers/custom_models/cd/layers/blocks.py +++ b/paddlers/custom_models/cd/layers/blocks.py @@ -140,12 +140,14 @@ def __init__(self, class MaxPool2x2(nn.MaxPool2D): def __init__(self, **kwargs): - super(MaxPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs) + super(MaxPool2x2, self).__init__( + kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs) class MaxUnPool2x2(nn.MaxUnPool2D): def __init__(self, **kwargs): - super(MaxUnPool2x2, self).__init__(kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs) + super(MaxUnPool2x2, self).__init__( + kernel_size=2, stride=(2, 2), padding=(0, 0), **kwargs) class ConvTransposed3x3(nn.Layer): diff --git a/paddlers/custom_models/cd/snunet.py b/paddlers/custom_models/cd/snunet.py index c73af29b..161a9a08 100644 --- a/paddlers/custom_models/cd/snunet.py +++ b/paddlers/custom_models/cd/snunet.py @@ -132,7 +132,7 @@ def forward(self, t1, t2): out = paddle.concat([x0_1, x0_2, x0_3, x0_4], 1) - intra = paddle.sum(paddle.stack([x0_1, x0_2, x0_3, x0_4]), axis=0) + intra = x0_1 + x0_2 + x0_3 + x0_4 m_intra = self.ca_intra(intra) out = self.ca_inter(out) * (out + paddle.tile(m_intra, (1, 4, 1, 1))) diff --git a/paddlers/custom_models/cls/condensenet_v2.py b/paddlers/custom_models/cls/condensenet_v2.py index fe057d63..2ca10739 100644 --- a/paddlers/custom_models/cls/condensenet_v2.py +++ b/paddlers/custom_models/cls/condensenet_v2.py @@ -39,7 +39,7 @@ def forward(self, x): b, c, _, _ = x.shape y = self.avg_pool(x).reshape((b, c)) y = self.fc(y).reshape((b, c, 1, 1)) - return x * y.expand_as(x) + return x * paddle.expand(y, shape=x.shape) class HS(nn.Layer): @@ -92,7 +92,7 @@ def ShuffleLayer(x, groups): # transpose x = x.transpose((0, 2, 1, 3, 4)) # reshape - x = x.reshape((batchsize, -1, height, width)) + x = x.reshape((batchsize, groups * channels_per_group, height, width)) return x @@ -104,7 +104,7 @@ def ShuffleLayerTrans(x, groups): # transpose x = x.transpose((0, 2, 1, 3, 4)) # reshape - x = x.reshape((batchsize, -1, height, width)) + x = x.reshape((batchsize, channels_per_group * groups, height, width)) return x @@ -374,7 +374,8 @@ def add_block(self, i, group_1x1, group_3x3, group_trans, bottleneck, def forward(self, x): features = self.features(x) - out = features.reshape((features.shape[0], -1)) + out = features.reshape((features.shape[0], features.shape[1] * + features.shape[2] * features.shape[3])) out = self.fc(out) out = self.fc_act(out) diff --git a/paddlers/custom_models/gan/generators/rcan.py b/paddlers/custom_models/gan/generators/rcan.py index e838dc2d..9de30c71 100644 --- a/paddlers/custom_models/gan/generators/rcan.py +++ b/paddlers/custom_models/gan/generators/rcan.py @@ -8,14 +8,15 @@ def default_conv(in_channels, out_channels, kernel_size, bias=True): - weight_attr = paddle.ParamAttr(initializer=paddle.nn.initializer.XavierUniform(), - need_clip =True) - return nn.Conv2D(in_channels, - out_channels, - kernel_size, - padding=(kernel_size // 2), - weight_attr=weight_attr, - bias_attr=bias) + weight_attr = paddle.ParamAttr( + initializer=paddle.nn.initializer.XavierUniform(), need_clip=True) + return nn.Conv2D( + in_channels, + out_channels, + kernel_size, + padding=(kernel_size // 2), + weight_attr=weight_attr, + bias_attr=bias) class MeanShift(nn.Conv2D): diff --git a/paddlers/custom_models/gan/rcan_model.py b/paddlers/custom_models/gan/rcan_model.py index 0676e756..781f46e1 100644 --- a/paddlers/custom_models/gan/rcan_model.py +++ b/paddlers/custom_models/gan/rcan_model.py @@ -27,6 +27,7 @@ class RCANModel(BaseModel): """Base SR model for single image super-resolution. """ + def __init__(self, generator, pixel_criterion=None, use_init_weight=False): """ Args: @@ -69,16 +70,14 @@ def train_iter(self, optims=None): loss_pixel.backward() optims['optim'].step() else: - print('Skip this batch {}! (Loss: {})'.format( - self.batch + 1, loss_pixel.item() - )) + print('Skip this batch {}! (Loss: {})'.format(self.batch + 1, + loss_pixel.item())) self.batch += 1 if self.batch % 1000 == 0: - self.error_last = loss_pixel.item()/1000 + self.error_last = loss_pixel.item() / 1000 print("update error_last:{}".format(self.error_last)) - def test_iter(self, metrics=None): self.nets['generator'].eval() with paddle.no_grad(): @@ -99,8 +98,8 @@ def test_iter(self, metrics=None): def init_sr_weight(net): def reset_func(m): - if hasattr(m, 'weight') and (not isinstance( - m, (nn.BatchNorm, nn.BatchNorm2D))): + if hasattr(m, 'weight') and ( + not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))): reset_parameters(m) net.apply(reset_func) diff --git a/paddlers/custom_models/seg/farseg.py b/paddlers/custom_models/seg/farseg.py index 2e0161f8..ce487459 100644 --- a/paddlers/custom_models/seg/farseg.py +++ b/paddlers/custom_models/seg/farseg.py @@ -32,7 +32,7 @@ class FPN(nn.Layer): """ Module that adds FPN on top of a list of feature maps. The feature maps are currently supposed to be in increasing depth - order, and must be consecutive + order, and must be consecutive """ def __init__(self, @@ -41,38 +41,35 @@ def __init__(self, conv_block=ConvReLU, top_blocks=None): super(FPN, self).__init__() - self.inner_blocks = [] - self.layer_blocks = [] + + inner_blocks = [] + layer_blocks = [] for idx, in_channels in enumerate(in_channels_list, 1): - inner_block = "fpn_inner{}".format(idx) - layer_block = "fpn_layer{}".format(idx) if in_channels == 0: continue inner_block_module = conv_block(in_channels, out_channels, 1) layer_block_module = conv_block(out_channels, out_channels, 3, 1) - self.add_sublayer(inner_block, inner_block_module) - self.add_sublayer(layer_block, layer_block_module) for module in [inner_block_module, layer_block_module]: for m in module.sublayers(): if isinstance(m, nn.Conv2D): kaiming_normal_init(m.weight) - self.inner_blocks.append(inner_block) - self.layer_blocks.append(layer_block) + inner_blocks.append(inner_block_module) + layer_blocks.append(layer_block_module) + self.inner_blocks = nn.LayerList(inner_blocks) + self.layer_blocks = nn.LayerList(layer_blocks) self.top_blocks = top_blocks def forward(self, x): - last_inner = getattr(self, self.inner_blocks[-1])(x[-1]) - results = [getattr(self, self.layer_blocks[-1])(last_inner)] - for feature, inner_block, layer_block in zip( - x[:-1][::-1], self.inner_blocks[:-1][::-1], - self.layer_blocks[:-1][::-1]): - if not inner_block: - continue + last_inner = self.inner_blocks[-1](x[-1]) + results = [self.layer_blocks[-1](last_inner)] + for i, feature in enumerate(x[-2::-1]): + inner_block = self.inner_blocks[len(self.inner_blocks) - 2 - i] + layer_block = self.layer_blocks[len(self.layer_blocks) - 2 - i] inner_top_down = F.interpolate( last_inner, scale_factor=2, mode="nearest") - inner_lateral = getattr(self, inner_block)(feature) + inner_lateral = inner_block(feature) last_inner = inner_lateral + inner_top_down - results.insert(0, getattr(self, layer_block)(last_inner)) + results.insert(0, layer_block(last_inner)) if isinstance(self.top_blocks, LastLevelP6P7): last_results = self.top_blocks(x[-1], results[-1]) results.extend(last_results) diff --git a/paddlers/datasets/__init__.py b/paddlers/datasets/__init__.py index 145fe304..a8aeca07 100644 --- a/paddlers/datasets/__init__.py +++ b/paddlers/datasets/__init__.py @@ -17,4 +17,4 @@ from .seg_dataset import SegDataset from .cd_dataset import CDDataset from .clas_dataset import ClasDataset -from .sr_dataset import SRdataset, ComposeTrans \ No newline at end of file +from .sr_dataset import SRdataset, ComposeTrans diff --git a/paddlers/datasets/cd_dataset.py b/paddlers/datasets/cd_dataset.py index 4f11d06b..c0576ad0 100644 --- a/paddlers/datasets/cd_dataset.py +++ b/paddlers/datasets/cd_dataset.py @@ -17,6 +17,7 @@ import os.path as osp from paddle.io import Dataset + from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic diff --git a/paddlers/datasets/clas_dataset.py b/paddlers/datasets/clas_dataset.py index bf21ed40..172513dd 100644 --- a/paddlers/datasets/clas_dataset.py +++ b/paddlers/datasets/clas_dataset.py @@ -16,6 +16,7 @@ import copy from paddle.io import Dataset + from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic diff --git a/paddlers/datasets/coco.py b/paddlers/datasets/coco.py index 667c01bb..b4fc845f 100644 --- a/paddlers/datasets/coco.py +++ b/paddlers/datasets/coco.py @@ -23,7 +23,7 @@ from paddle.io import Dataset from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic -from paddlers.transforms import ImgDecoder, MixupImage +from paddlers.transforms import DecodeImg, MixupImage from paddlers.tools import YOLOAnchorCluster @@ -256,8 +256,8 @@ def __getitem__(self, idx): if self.data_fields is not None: sample_mix = {k: sample_mix[k] for k in self.data_fields} sample = self.mixup_op(sample=[ - ImgDecoder(to_rgb=False)(sample), - ImgDecoder(to_rgb=False)(sample_mix) + DecodeImg(to_rgb=False)(sample), + DecodeImg(to_rgb=False)(sample_mix) ]) sample = self.transforms(sample) return sample diff --git a/paddlers/datasets/voc.py b/paddlers/datasets/voc.py index 2ce2f36c..1876910e 100644 --- a/paddlers/datasets/voc.py +++ b/paddlers/datasets/voc.py @@ -25,7 +25,7 @@ from paddle.io import Dataset from paddlers.utils import logging, get_num_workers, get_encoding, path_normalization, is_pic -from paddlers.transforms import ImgDecoder, MixupImage +from paddlers.transforms import DecodeImg, MixupImage from paddlers.tools import YOLOAnchorCluster @@ -320,8 +320,8 @@ def __getitem__(self, idx): if self.data_fields is not None: sample_mix = {k: sample_mix[k] for k in self.data_fields} sample = self.mixup_op(sample=[ - ImgDecoder(to_rgb=False)(sample), - ImgDecoder(to_rgb=False)(sample_mix) + DecodeImg(to_rgb=False)(sample), + DecodeImg(to_rgb=False)(sample_mix) ]) sample = self.transforms(sample) return sample diff --git a/paddlers/deploy/predictor.py b/paddlers/deploy/predictor.py index 96fdb975..2bc3d382 100644 --- a/paddlers/deploy/predictor.py +++ b/paddlers/deploy/predictor.py @@ -175,9 +175,9 @@ def postprocess(self, net_outputs, topk=1, ori_shape=None, transforms=None): if self._model._postprocess is None: self._model.build_postprocess_from_labels(topk) # XXX: Convert ndarray to tensor as self._model._postprocess requires - net_outputs = paddle.to_tensor(net_outputs) - assert net_outputs.shape[1] == 1 - outputs = self._model._postprocess(net_outputs.squeeze(1)) + assert len(net_outputs) == 1 + net_outputs = paddle.to_tensor(net_outputs[0]) + outputs = self._model._postprocess(net_outputs) class_ids = map(itemgetter('class_ids'), outputs) scores = map(itemgetter('scores'), outputs) label_names = map(itemgetter('label_names'), outputs) @@ -252,22 +252,26 @@ def predict(self, transforms=None, warmup_iters=0, repeats=1): - """ 图片预测 + """ + Do prediction. + Args: - img_file(List[str or tuple or np.ndarray], str, tuple, or np.ndarray): - 对于场景分类、图像复原、目标检测和语义分割任务来说,该参数可为单一图像路径,或是解码后的、排列格式为(H, W, C) - 且具有float32类型的BGR图像(表示为numpy的ndarray形式),或者是一组图像路径或np.ndarray对象构成的列表;对于变化检测 - 任务来说,该参数可以为图像路径二元组(分别表示前后两个时相影像路径),或是两幅图像组成的二元组,或者是上述两种二元组 - 之一构成的列表。 - topk(int): 场景分类模型预测时使用,表示预测前topk的结果。默认值为1。 - transforms (paddlers.transforms): 数据预处理操作。默认值为None, 即使用`model.yml`中保存的数据预处理操作。 - warmup_iters (int): 预热轮数,用于评估模型推理以及前后处理速度。若大于1,会预先重复预测warmup_iters,而后才开始正式的预测及其速度评估。默认为0。 - repeats (int): 重复次数,用于评估模型推理以及前后处理速度。若大于1,会预测repeats次取时间平均值。默认值为1。 + img_file(list[str | tuple | np.ndarray] | str | tuple | np.ndarray): For scene classification, image restoration, + object detection and semantic segmentation tasks, `img_file` should be either the path of the image to predict + , a decoded image (a `np.ndarray`, which should be consistent with what you get from passing image path to + `paddlers.transforms.decode_image()`), or a list of image paths or decoded images. For change detection tasks, + `img_file` should be a tuple of image paths, a tuple of decoded images, or a list of tuples. + topk(int, optional): Top-k values to reserve in a classification result. Defaults to 1. + transforms (paddlers.transforms.Compose | None, optional): Pipeline of data preprocessing. If None, load transforms + from `model.yml`. Defaults to None. + warmup_iters (int, optional): Warm-up iterations before measuring the execution time. Defaults to 0. + repeats (int, optional): Number of repetitions to evaluate model inference and data processing speed. If greater than + 1, the reported time consumption is the average of all repeats. Defaults to 1. """ if repeats < 1: logging.error("`repeats` must be greater than 1.", exit=True) if transforms is None and not hasattr(self._model, 'test_transforms'): - raise Exception("Transforms need to be defined, now is None.") + raise ValueError("Transforms need to be defined, now is None.") if transforms is None: transforms = self._model.test_transforms if isinstance(img_file, tuple) and len(img_file) != 2: diff --git a/paddlers/models/__init__.py b/paddlers/models/__init__.py index 9ddd0975..345e589c 100644 --- a/paddlers/models/__init__.py +++ b/paddlers/models/__init__.py @@ -12,4 +12,4 @@ # See the License for the specific language governing permissions and # limitations under the License. -from . import ppcls, ppdet, ppseg, ppgan \ No newline at end of file +from . import ppcls, ppdet, ppseg, ppgan diff --git a/paddlers/models/ppcls/__init__.py b/paddlers/models/ppcls/__init__.py index a906591d..2ba76e9b 100644 --- a/paddlers/models/ppcls/__init__.py +++ b/paddlers/models/ppcls/__init__.py @@ -20,4 +20,4 @@ from .arch import * from .optimizer import * from .data import * -from .utils import * \ No newline at end of file +from .utils import * diff --git a/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py b/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py index 2b843788..f7d0931b 100644 --- a/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py +++ b/paddlers/models/ppcls/arch/backbone/model_zoo/xception.py @@ -201,22 +201,14 @@ def __init__(self, block_num=8): super(MiddleFlow, self).__init__() self.block_num = block_num - self._conv_0 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_0") - self._conv_1 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_1") - self._conv_2 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_2") - self._conv_3 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_3") - self._conv_4 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_4") - self._conv_5 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_5") - self._conv_6 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_6") - self._conv_7 = MiddleFlowBottleneckBlock( - 728, 728, name="middle_flow_7") + self._conv_0 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_0") + self._conv_1 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_1") + self._conv_2 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_2") + self._conv_3 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_3") + self._conv_4 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_4") + self._conv_5 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_5") + self._conv_6 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_6") + self._conv_7 = MiddleFlowBottleneckBlock(728, 728, name="middle_flow_7") if block_num == 16: self._conv_8 = MiddleFlowBottleneckBlock( 728, 728, name="middle_flow_8") @@ -297,8 +289,7 @@ def __init__(self, class_num): name = "exit_flow" - self._conv_0 = ExitFlowBottleneckBlock( - 728, 728, 1024, name=name + "_1") + self._conv_0 = ExitFlowBottleneckBlock(728, 728, 1024, name=name + "_1") self._conv_1 = SeparableConv(1024, 1536, stride=1, name=name + "_2") self._conv_2 = SeparableConv(1536, 2048, stride=1, name=name + "_3") self._pool = AdaptiveAvgPool2D(1) @@ -362,16 +353,14 @@ def Xception41(pretrained=False, use_ssld=False, **kwargs): def Xception65(pretrained=False, use_ssld=False, **kwargs): - model = Xception( - entry_flow_block_num=3, middle_flow_block_num=16, **kwargs) + model = Xception(entry_flow_block_num=3, middle_flow_block_num=16, **kwargs) _load_pretrained( pretrained, model, MODEL_URLS["Xception65"], use_ssld=use_ssld) return model def Xception71(pretrained=False, use_ssld=False, **kwargs): - model = Xception( - entry_flow_block_num=5, middle_flow_block_num=16, **kwargs) + model = Xception(entry_flow_block_num=5, middle_flow_block_num=16, **kwargs) _load_pretrained( pretrained, model, MODEL_URLS["Xception71"], use_ssld=use_ssld) return model diff --git a/paddlers/models/ppcls/data/utils/__init__.py b/paddlers/models/ppcls/data/utils/__init__.py index 61d5aa21..185a92b8 100644 --- a/paddlers/models/ppcls/data/utils/__init__.py +++ b/paddlers/models/ppcls/data/utils/__init__.py @@ -10,4 +10,4 @@ # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and -# limitations under the License. \ No newline at end of file +# limitations under the License. diff --git a/paddlers/models/ppcls/loss/deephashloss.py b/paddlers/models/ppcls/loss/deephashloss.py index c9a58dc7..0fecb632 100644 --- a/paddlers/models/ppcls/loss/deephashloss.py +++ b/paddlers/models/ppcls/loss/deephashloss.py @@ -15,6 +15,7 @@ import paddle import paddle.nn as nn + class DSHSDLoss(nn.Layer): """ # DSHSD(IEEE ACCESS 2019) @@ -23,6 +24,7 @@ class DSHSDLoss(nn.Layer): # [DSHSD] epoch:250, bit:48, dataset:nuswide_21, MAP:0.809, Best MAP: 0.815 # [DSHSD] epoch:135, bit:48, dataset:imagenet, MAP:0.647, Best MAP: 0.647 """ + def __init__(self, alpha, multi_label=False): super(DSHSDLoss, self).__init__() self.alpha = alpha @@ -65,6 +67,7 @@ class LCDSHLoss(nn.Layer): # [LCDSH] epoch:145, bit:48, dataset:cifar10-1, MAP:0.798, Best MAP: 0.798 # [LCDSH] epoch:183, bit:48, dataset:nuswide_21, MAP:0.833, Best MAP: 0.834 """ + def __init__(self, n_class, _lambda): super(LCDSHLoss, self).__init__() self._lambda = _lambda @@ -75,9 +78,11 @@ def forward(self, input, label): # label to ont-hot label = paddle.flatten(label) - label = paddle.nn.functional.one_hot(label, self.n_class).astype("float32") - - s = 2 * (paddle.matmul(label, label, transpose_y=True) > 0).astype("float32") - 1 + label = paddle.nn.functional.one_hot(label, + self.n_class).astype("float32") + + s = 2 * (paddle.matmul( + label, label, transpose_y=True) > 0).astype("float32") - 1 inner_product = paddle.matmul(feature, feature, transpose_y=True) * 0.5 inner_product = inner_product.clip(min=-50, max=50) @@ -89,4 +94,3 @@ def forward(self, input, label): L2 = (sigmoid(inner_product) - sigmoid(inner_product_)).pow(2).mean() return {"lcdshloss": L1 + self._lambda * L2} - diff --git a/paddlers/models/ppcls/loss/googlenetloss.py b/paddlers/models/ppcls/loss/googlenetloss.py index c580aa61..c26915b0 100644 --- a/paddlers/models/ppcls/loss/googlenetloss.py +++ b/paddlers/models/ppcls/loss/googlenetloss.py @@ -19,10 +19,11 @@ class GoogLeNetLoss(nn.Layer): """ Cross entropy loss used after googlenet """ + def __init__(self, epsilon=None): super().__init__() - assert (epsilon is None or epsilon <= 0 or epsilon >= 1), "googlenet is not support label_smooth" - + assert (epsilon is None or epsilon <= 0 or + epsilon >= 1), "googlenet is not support label_smooth" def forward(self, inputs, label): input0, input1, input2 = inputs diff --git a/paddlers/models/ppcls/loss/multilabelloss.py b/paddlers/models/ppcls/loss/multilabelloss.py index d30d5b8d..4ca76418 100644 --- a/paddlers/models/ppcls/loss/multilabelloss.py +++ b/paddlers/models/ppcls/loss/multilabelloss.py @@ -26,11 +26,9 @@ def _labelsmoothing(self, target, class_num): def _binary_crossentropy(self, input, target, class_num): if self.epsilon is not None: target = self._labelsmoothing(target, class_num) - cost = F.binary_cross_entropy_with_logits( - logit=input, label=target) + cost = F.binary_cross_entropy_with_logits(logit=input, label=target) else: - cost = F.binary_cross_entropy_with_logits( - logit=input, label=target) + cost = F.binary_cross_entropy_with_logits(logit=input, label=target) return cost diff --git a/paddlers/models/ppcls/loss/pairwisecosface.py b/paddlers/models/ppcls/loss/pairwisecosface.py index beb80686..8969cf92 100644 --- a/paddlers/models/ppcls/loss/pairwisecosface.py +++ b/paddlers/models/ppcls/loss/pairwisecosface.py @@ -36,8 +36,10 @@ def forward(self, embedding, targets): dist_mat = paddle.matmul(embedding, embedding, transpose_y=True) N = dist_mat.shape[0] - is_pos = targets.reshape([N,1]).expand([N,N]).equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') - is_neg = targets.reshape([N,1]).expand([N,N]).not_equal(paddle.t(targets.reshape([N,1]).expand([N,N]))).astype('float') + is_pos = targets.reshape([N, 1]).expand([N, N]).equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') + is_neg = targets.reshape([N, 1]).expand([N, N]).not_equal( + paddle.t(targets.reshape([N, 1]).expand([N, N]))).astype('float') # Mask scores related to itself is_pos = is_pos - paddle.eye(N, N) @@ -48,8 +50,9 @@ def forward(self, embedding, targets): logit_p = -self.gamma * s_p + (-99999999.) * (1 - is_pos) logit_n = self.gamma * (s_n + self.margin) + (-99999999.) * (1 - is_neg) - loss = F.softplus(paddle.logsumexp(logit_p, axis=1) + paddle.logsumexp(logit_n, axis=1)).mean() - - return {"PairwiseCosface": loss} - + loss = F.softplus( + paddle.logsumexp( + logit_p, axis=1) + paddle.logsumexp( + logit_n, axis=1)).mean() + return {"PairwiseCosface": loss} diff --git a/paddlers/models/ppcls/loss/supconloss.py b/paddlers/models/ppcls/loss/supconloss.py index 3dd33bc1..585c0e02 100644 --- a/paddlers/models/ppcls/loss/supconloss.py +++ b/paddlers/models/ppcls/loss/supconloss.py @@ -58,8 +58,7 @@ def forward(self, features, labels, mask=None): elif labels is not None: labels = labels.reshape([-1, 1]) if labels.shape[0] != batch_size: - raise ValueError( - 'Num of labels does not match num of features') + raise ValueError('Num of labels does not match num of features') mask = paddle.cast( paddle.equal(labels, paddle.t(labels)), 'float32') else: diff --git a/paddlers/models/ppcls/metric/__init__.py b/paddlers/models/ppcls/metric/__init__.py index 94721235..b85bdd57 100644 --- a/paddlers/models/ppcls/metric/__init__.py +++ b/paddlers/models/ppcls/metric/__init__.py @@ -34,8 +34,7 @@ def __init__(self, config_list): metric_name = list(config)[0] metric_params = config[metric_name] if metric_params is not None: - self.metric_func_list.append( - eval(metric_name)(**metric_params)) + self.metric_func_list.append(eval(metric_name)(**metric_params)) else: self.metric_func_list.append(eval(metric_name)()) diff --git a/paddlers/models/ppcls/utils/download.py b/paddlers/models/ppcls/utils/download.py index 9c457504..e70f2824 100644 --- a/paddlers/models/ppcls/utils/download.py +++ b/paddlers/models/ppcls/utils/download.py @@ -120,8 +120,7 @@ def get_path_from_url(url, # Mainly used to solve the problem of downloading data from different # machines in the case of multiple machines. Different ips will download # data, and the same ip will only download data once. - unique_endpoints = _get_unique_endpoints(ParallelEnv() - .trainer_endpoints[:]) + unique_endpoints = _get_unique_endpoints(ParallelEnv().trainer_endpoints[:]) if osp.exists(fullpath) and check_exist and _md5check(fullpath, md5sum): logger.info("Found {}".format(fullpath)) else: diff --git a/paddlers/models/ppdet/metrics/json_results.py b/paddlers/models/ppdet/metrics/json_results.py index 3d7054de..aab0fbec 100755 --- a/paddlers/models/ppdet/metrics/json_results.py +++ b/paddlers/models/ppdet/metrics/json_results.py @@ -141,8 +141,8 @@ def get_keypoint_res(results, im_id): } x = kpt[0::3] y = kpt[1::3] - x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min( - y).item(), np.max(y).item() + x0, x1, y0, y1 = np.min(x).item(), np.max(x).item(), np.min(y).item( + ), np.max(y).item() ann['area'] = (x1 - x0) * (y1 - y0) ann['bbox'] = [x0, y0, x1 - x0, y1 - y0] anns.append(ann) diff --git a/paddlers/models/ppdet/modeling/architectures/meta_arch.py b/paddlers/models/ppdet/modeling/architectures/meta_arch.py index 1ab14485..77db52b8 100644 --- a/paddlers/models/ppdet/modeling/architectures/meta_arch.py +++ b/paddlers/models/ppdet/modeling/architectures/meta_arch.py @@ -25,8 +25,7 @@ def load_meanstd(self, cfg_transform): self.scale = 1. self.mean = paddle.to_tensor([0.485, 0.456, 0.406]).reshape( (1, 3, 1, 1)) - self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape( - (1, 3, 1, 1)) + self.std = paddle.to_tensor([0.229, 0.224, 0.225]).reshape((1, 3, 1, 1)) for item in cfg_transform: if 'NormalizeImage' in item: self.mean = paddle.to_tensor(item['NormalizeImage'][ @@ -83,8 +82,7 @@ def merge_multi_scale_predictions(self, outs): nms_threshold = 0.5 keep_top_k = 100 - if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN' - ): + if self.__class__.__name__ in ('CascadeRCNN', 'FasterRCNN', 'MaskRCNN'): num_classes = self.bbox_head.num_classes keep_top_k = self.bbox_post_process.nms.keep_top_k nms_threshold = self.bbox_post_process.nms.nms_threshold diff --git a/paddlers/models/ppdet/modeling/backbones/vgg.py b/paddlers/models/ppdet/modeling/backbones/vgg.py index d383677c..1b9e19ac 100755 --- a/paddlers/models/ppdet/modeling/backbones/vgg.py +++ b/paddlers/models/ppdet/modeling/backbones/vgg.py @@ -109,12 +109,12 @@ def forward(self, inputs): @register @serializable class VGG(nn.Layer): - def __init__( - self, - depth=16, - normalizations=[20., -1, -1, -1, -1, -1], - extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], - [128, 256, 0, 1, 3], [128, 256, 0, 1, 3]]): + def __init__(self, + depth=16, + normalizations=[20., -1, -1, -1, -1, -1], + extra_block_filters=[[256, 512, 1, 2, 3], [128, 256, 1, 2, 3], + [128, 256, 0, 1, 3], + [128, 256, 0, 1, 3]]): super(VGG, self).__init__() assert depth in [16, 19], \ @@ -159,8 +159,8 @@ def __init__( for i, v in enumerate(self.extra_block_filters): assert len(v) == 5, "extra_block_filters size not fix" extra_conv = self.add_sublayer("conv{}".format(6 + i), - ExtraBlock(last_channels, v[0], - v[1], v[2], v[3], v[4])) + ExtraBlock(last_channels, v[0], v[1], + v[2], v[3], v[4])) last_channels = v[1] self.extra_convs.append(extra_conv) self._out_channels.append(last_channels) diff --git a/paddlers/models/ppdet/modeling/bbox_utils.py b/paddlers/models/ppdet/modeling/bbox_utils.py index b2ab46cf..d5d376cb 100644 --- a/paddlers/models/ppdet/modeling/bbox_utils.py +++ b/paddlers/models/ppdet/modeling/bbox_utils.py @@ -265,8 +265,7 @@ def decode_yolo(box, anchor, downsample_ratio): """ x, y, w, h = box na, grid_h, grid_w = x.shape[1:4] - grid = make_grid(grid_h, grid_w, x.dtype).reshape( - (1, 1, grid_h, grid_w, 2)) + grid = make_grid(grid_h, grid_w, x.dtype).reshape((1, 1, grid_h, grid_w, 2)) x1 = (x + grid[:, :, :, :, 0:1]) / grid_w y1 = (y + grid[:, :, :, :, 1:2]) / grid_h @@ -345,8 +344,7 @@ def bbox_iou(box1, box2, giou=False, diou=False, ciou=False, eps=1e-9): # convex diagonal squared c2 = cw**2 + ch**2 + eps # center distance - rho2 = ( - (px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4 + rho2 = ((px1 + px2 - gx1 - gx2)**2 + (py1 + py2 - gy1 - gy2)**2) / 4 if diou: return iou - rho2 / c2 else: @@ -461,8 +459,8 @@ def rbox2delta(proposals, gt, means=[0, 0, 0, 0, 0], stds=[1, 1, 1, 1, 1]): coord = gt[..., 0:2] - proposals[..., 0:2] dx = (np.cos(proposals[..., 4]) * coord[..., 0] + np.sin(proposals[..., 4]) * coord[..., 1]) / proposals_widths - dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + - np.cos(proposals[..., 4]) * coord[..., 1]) / proposals_heights + dy = (-np.sin(proposals[..., 4]) * coord[..., 0] + np.cos(proposals[..., 4]) + * coord[..., 1]) / proposals_heights dw = np.log(gt_widths / proposals_widths) dh = np.log(gt_heights / proposals_heights) da = (gt_angle - proposals_angle) @@ -553,8 +551,7 @@ def norm_angle(angle, range=[-np.pi / 4, np.pi]): def cal_line_length(point1, point2): import math return math.sqrt( - math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], - 2)) + math.pow(point1[0] - point2[0], 2) + math.pow(point1[1] - point2[1], 2)) def get_best_begin_point_single(coordinate): diff --git a/paddlers/models/ppdet/modeling/layers.py b/paddlers/models/ppdet/modeling/layers.py index d4c7b791..6a0cca15 100644 --- a/paddlers/models/ppdet/modeling/layers.py +++ b/paddlers/models/ppdet/modeling/layers.py @@ -170,12 +170,10 @@ def __init__(self, norm_lr = 0. if freeze_norm else 1. param_attr = ParamAttr( learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) - if norm_decay is not None else None) + regularizer=L2Decay(norm_decay) if norm_decay is not None else None) bias_attr = ParamAttr( learning_rate=norm_lr, - regularizer=L2Decay(norm_decay) - if norm_decay is not None else None) + regularizer=L2Decay(norm_decay) if norm_decay is not None else None) if norm_type in ['bn', 'sync_bn']: self.norm = nn.BatchNorm2D( ch_out, weight_attr=param_attr, bias_attr=bias_attr) @@ -293,19 +291,18 @@ def forward(self, x): @register @serializable class AnchorGeneratorSSD(object): - def __init__( - self, - steps=[8, 16, 32, 64, 100, 300], - aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], - min_ratio=15, - max_ratio=90, - base_size=300, - min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0], - max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0], - offset=0.5, - flip=True, - clip=False, - min_max_aspect_ratios_order=False): + def __init__(self, + steps=[8, 16, 32, 64, 100, 300], + aspect_ratios=[[2.], [2., 3.], [2., 3.], [2., 3.], [2.], [2.]], + min_ratio=15, + max_ratio=90, + base_size=300, + min_sizes=[30.0, 60.0, 111.0, 162.0, 213.0, 264.0], + max_sizes=[60.0, 111.0, 162.0, 213.0, 264.0, 315.0], + offset=0.5, + flip=True, + clip=False, + min_max_aspect_ratios_order=False): self.steps = steps self.aspect_ratios = aspect_ratios self.min_ratio = min_ratio @@ -1035,19 +1032,16 @@ def __call__(self, seg_masks = paddle.flatten(seg_masks, start_axis=1, stop_axis=-1) # inter. - inter_matrix = paddle.mm(seg_masks, - paddle.transpose(seg_masks, [1, 0])) + inter_matrix = paddle.mm(seg_masks, paddle.transpose(seg_masks, [1, 0])) n_samples = paddle.shape(cate_labels) # union. sum_masks_x = paddle.expand(sum_masks, shape=[n_samples, n_samples]) # iou. iou_matrix = (inter_matrix / ( - sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix) - ) + sum_masks_x + paddle.transpose(sum_masks_x, [1, 0]) - inter_matrix)) iou_matrix = paddle.triu(iou_matrix, diagonal=1) # label_specific matrix. - cate_labels_x = paddle.expand( - cate_labels, shape=[n_samples, n_samples]) + cate_labels_x = paddle.expand(cate_labels, shape=[n_samples, n_samples]) label_matrix = paddle.cast( (cate_labels_x == paddle.transpose(cate_labels_x, [1, 0])), 'float32') @@ -1304,8 +1298,8 @@ def compute_qkv(self, tensor, index): if self._qkv_same_embed_dim: tensor = F.linear( x=tensor, - weight=self.in_proj_weight[:, index * self.embed_dim:( - index + 1) * self.embed_dim], + weight=self.in_proj_weight[:, index * self.embed_dim:(index + 1) + * self.embed_dim], bias=self.in_proj_bias[index * self.embed_dim:(index + 1) * self.embed_dim] if self.in_proj_bias is not None else None) diff --git a/paddlers/models/ppdet/modeling/ops.py b/paddlers/models/ppdet/modeling/ops.py index 552e5224..005a1313 100644 --- a/paddlers/models/ppdet/modeling/ops.py +++ b/paddlers/models/ppdet/modeling/ops.py @@ -690,13 +690,12 @@ def yolo_box( if not isinstance(class_num, int): raise TypeError("Attr class_num of yolo_box must be an integer") if not isinstance(conf_thresh, float): - raise TypeError( - "Attr ignore_thresh of yolo_box must be a float number") + raise TypeError("Attr ignore_thresh of yolo_box must be a float number") if in_dygraph_mode(): attrs = ('anchors', anchors, 'class_num', class_num, 'conf_thresh', - conf_thresh, 'downsample_ratio', downsample_ratio, - 'clip_bbox', clip_bbox, 'scale_x_y', scale_x_y) + conf_thresh, 'downsample_ratio', downsample_ratio, 'clip_bbox', + clip_bbox, 'scale_x_y', scale_x_y) boxes, scores = core.ops.yolo_box(x, origin_shape, *attrs) return boxes, scores else: @@ -978,8 +977,8 @@ class number score_threshold, 'nms_top_k', nms_top_k, 'nms_threshold', nms_threshold, 'keep_top_k', keep_top_k, 'nms_eta', nms_eta, 'normalized', normalized) - output, index, nms_rois_num = core.ops.multiclass_nms3( - bboxes, scores, rois_num, *attrs) + output, index, nms_rois_num = core.ops.multiclass_nms3(bboxes, scores, + rois_num, *attrs) if not return_index: index = None return output, nms_rois_num, index @@ -1116,10 +1115,10 @@ def matrix_nms(bboxes, if in_dygraph_mode(): attrs = ('background_label', background_label, 'score_threshold', - score_threshold, 'post_threshold', post_threshold, - 'nms_top_k', nms_top_k, 'gaussian_sigma', gaussian_sigma, - 'use_gaussian', use_gaussian, 'keep_top_k', keep_top_k, - 'normalized', normalized) + score_threshold, 'post_threshold', post_threshold, 'nms_top_k', + nms_top_k, 'gaussian_sigma', gaussian_sigma, 'use_gaussian', + use_gaussian, 'keep_top_k', keep_top_k, 'normalized', + normalized) out, index, rois_num = core.ops.matrix_nms(bboxes, scores, *attrs) if not return_index: index = None @@ -1503,9 +1502,9 @@ def generate_proposals(scores, """ if in_dygraph_mode(): assert return_rois_num, "return_rois_num should be True in dygraph mode." - attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', - post_nms_top_n, 'nms_thresh', nms_thresh, 'min_size', - min_size, 'eta', eta, 'pixel_offset', pixel_offset) + attrs = ('pre_nms_topN', pre_nms_top_n, 'post_nms_topN', post_nms_top_n, + 'nms_thresh', nms_thresh, 'min_size', min_size, 'eta', eta, + 'pixel_offset', pixel_offset) rpn_rois, rpn_roi_probs, rpn_rois_num = core.ops.generate_proposals_v2( scores, bbox_deltas, im_shape, anchors, variances, *attrs) return rpn_rois, rpn_roi_probs, rpn_rois_num @@ -1575,10 +1574,7 @@ def sigmoid_cross_entropy_with_logits(input, return output -def smooth_l1(input, - label, - inside_weight=None, - outside_weight=None, +def smooth_l1(input, label, inside_weight=None, outside_weight=None, sigma=None): input_new = paddle.multiply(input, inside_weight) label_new = paddle.multiply(label, inside_weight) diff --git a/paddlers/models/ppdet/modeling/post_process.py b/paddlers/models/ppdet/modeling/post_process.py index 8922f0f3..b9e556e4 100644 --- a/paddlers/models/ppdet/modeling/post_process.py +++ b/paddlers/models/ppdet/modeling/post_process.py @@ -209,7 +209,7 @@ def __call__(self, mask_out, bboxes, bbox_num, origin_shape): # TODO: support bs > 1 and mask output dtype is bool pred_result = paddle.zeros( [num_mask, origin_shape[0][0], origin_shape[0][1]], dtype='int32') - if bbox_num == 1 and bboxes[0][0] == -1: + if (len(bbox_num) == 1 and bbox_num[0] == 1) and bboxes[0][0] == -1: return pred_result # TODO: optimize chunk paste diff --git a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py index 27b30307..40538a6f 100644 --- a/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py +++ b/paddlers/models/ppdet/modeling/proposal_generator/anchor_generator.py @@ -77,8 +77,7 @@ def generate_cell_anchors(self, sizes, aspect_ratios): def _calculate_anchors(self, num_features): sizes = self._broadcast_params(self.anchor_sizes, num_features) - aspect_ratios = self._broadcast_params(self.aspect_ratios, - num_features) + aspect_ratios = self._broadcast_params(self.aspect_ratios, num_features) cell_anchors = [ self.generate_cell_anchors(s, a) for s, a in zip(sizes, aspect_ratios) @@ -94,10 +93,7 @@ def _create_grid_offsets(self, size, stride, offset): shifts_x = paddle.arange( offset * stride, grid_width * stride, step=stride, dtype='float32') shifts_y = paddle.arange( - offset * stride, - grid_height * stride, - step=stride, - dtype='float32') + offset * stride, grid_height * stride, step=stride, dtype='float32') shift_y, shift_x = paddle.meshgrid(shifts_y, shifts_x) shift_x = paddle.reshape(shift_x, [-1]) shift_y = paddle.reshape(shift_y, [-1]) diff --git a/paddlers/models/ppdet/modeling/proposal_generator/target.py b/paddlers/models/ppdet/modeling/proposal_generator/target.py index 571ae6c1..b92d0b3d 100644 --- a/paddlers/models/ppdet/modeling/proposal_generator/target.py +++ b/paddlers/models/ppdet/modeling/proposal_generator/target.py @@ -40,14 +40,12 @@ def rpn_anchor_target(anchors, anchors, gt_bbox, rpn_positive_overlap, rpn_negative_overlap, True, ignore_thresh, is_crowd_i, assign_on_cpu) # Step2: sample anchor - fg_inds, bg_inds = subsample_labels(match_labels, - rpn_batch_size_per_im, + fg_inds, bg_inds = subsample_labels(match_labels, rpn_batch_size_per_im, rpn_fg_fraction, 0, use_random) # Fill with the ignore label (-1), then set positive and negative labels labels = paddle.full(match_labels.shape, -1, dtype='int32') if bg_inds.shape[0] > 0: - labels = paddle.scatter(labels, bg_inds, - paddle.zeros_like(bg_inds)) + labels = paddle.scatter(labels, bg_inds, paddle.zeros_like(bg_inds)) if fg_inds.shape[0] > 0: labels = paddle.scatter(labels, fg_inds, paddle.ones_like(fg_inds)) # Step3: make output @@ -261,15 +259,14 @@ def sample_bbox(matches, paddle.ones_like(gt_classes) * num_classes, gt_classes) gt_classes = paddle.where(match_labels == -1, - paddle.ones_like(gt_classes) * -1, - gt_classes) + paddle.ones_like(gt_classes) * -1, gt_classes) if is_cascade: index = paddle.arange(matches.shape[0]) return index, gt_classes rois_per_image = int(batch_size_per_im) - fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, - fg_fraction, num_classes, use_random) + fg_inds, bg_inds = subsample_labels(gt_classes, rois_per_image, fg_fraction, + num_classes, use_random) if fg_inds.shape[0] == 0 and bg_inds.shape[0] == 0: # fake output labeled with -1 when all boxes are neither # foreground nor background @@ -364,9 +361,7 @@ def generate_mask_target(gt_segms, rois, labels_int32, sampled_gt_inds, rasterize_polygons_within_box(new_segm[j], boxes[j], resolution)) else: - results.append( - paddle.ones( - [resolution, resolution], dtype='int32')) + results.append(paddle.ones([resolution, resolution], dtype='int32')) fg_classes = paddle.gather(labels_per_im, fg_inds) weight = paddle.ones([fg_rois.shape[0]], dtype='float32') @@ -484,8 +479,8 @@ def libra_sample_neg(max_overlaps, if floor_thr > 0: floor_set = set( np.where( - np.logical_and(max_overlaps >= 0, max_overlaps < - floor_thr))[0]) + np.logical_and(max_overlaps >= 0, max_overlaps < floor_thr)) + [0]) iou_sampling_set = set(np.where(max_overlaps >= floor_thr)[0]) elif floor_thr == 0: floor_set = set(np.where(max_overlaps == 0)[0]) @@ -614,8 +609,7 @@ def libra_sample_bbox(matches, paddle.ones_like(gt_classes) * num_classes, gt_classes) gt_classes = paddle.where(match_labels == -1, - paddle.ones_like(gt_classes) * -1, - gt_classes) + paddle.ones_like(gt_classes) * -1, gt_classes) sampled_gt_classes = paddle.gather(gt_classes, sampled_inds) return sampled_inds, sampled_gt_classes diff --git a/paddlers/models/ppdet/utils/download.py b/paddlers/models/ppdet/utils/download.py index bdacbf60..2c007870 100644 --- a/paddlers/models/ppdet/utils/download.py +++ b/paddlers/models/ppdet/utils/download.py @@ -170,9 +170,9 @@ def get_dataset_path(path, annotation, image_dir): if _dataset_exists(path, annotation, image_dir): return path - logger.info( - "Dataset {} is not valid for reason above, try searching {} or " - "downloading dataset...".format(osp.realpath(path), DATASET_HOME)) + logger.info("Dataset {} is not valid for reason above, try searching {} or " + "downloading dataset...".format( + osp.realpath(path), DATASET_HOME)) data_name = os.path.split(path.strip().lower())[-1] for name, dataset in DATASETS.items(): diff --git a/paddlers/models/ppgan/apps/__init__.py b/paddlers/models/ppgan/apps/__init__.py index 97eee072..c576f43e 100644 --- a/paddlers/models/ppgan/apps/__init__.py +++ b/paddlers/models/ppgan/apps/__init__.py @@ -17,4 +17,4 @@ from .drn_predictor import DRNPredictor from .pan_predictor import PANPredictor from .lesrcnn_predictor import LESRCNNPredictor -from .esrgan_predictor import ESRGANPredictor \ No newline at end of file +from .esrgan_predictor import ESRGANPredictor diff --git a/paddlers/models/ppgan/apps/drn_predictor.py b/paddlers/models/ppgan/apps/drn_predictor.py index 4ee67429..9c30bd5e 100644 --- a/paddlers/models/ppgan/apps/drn_predictor.py +++ b/paddlers/models/ppgan/apps/drn_predictor.py @@ -16,7 +16,7 @@ import numpy as np from PIL import Image -import paddle +import paddle from ppgan.models.generators import DRNGenerator from ppgan.utils.download import get_path_from_url from ppgan.utils.logger import get_logger @@ -25,21 +25,25 @@ REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/DRNSx4.pdparams' + class DRNPredictor(BasePredictor): def __init__(self, output='output', weight_path=None): self.input = input - self.output = os.path.join(output, 'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 - self.model = DRNGenerator((2, 4)) # 实例化模型 + self.output = os.path.join(output, + 'DRN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 + self.model = DRNGenerator((2, 4)) # 实例化模型 if weight_path is None: weight_path = get_path_from_url(REALSR_WEIGHT_URL) - state_dict = paddle.load(weight_path) #加载权重 - state_dict = state_dict['generator'] + state_dict = paddle.load(weight_path) #加载权重 + state_dict = state_dict['generator'] self.model.load_dict(state_dict) self.model.eval() + # 标准化 def norm(self, img): img = np.array(img).transpose([2, 0, 1]).astype('float32') / 1.0 return img.astype('float32') + # 去标准化 def denorm(self, img): img = img.transpose((1, 2, 0)) @@ -54,14 +58,15 @@ def run_image(self, img): elif isinstance(img, Image.Image): ori_img = img - img = self.norm(ori_img) #图像标准化 - x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor + img = self.norm(ori_img) #图像标准化 + x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor with paddle.no_grad(): - out = self.model(x)[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果 - + out = self.model( + x + )[2] # 执行预测,DRN模型会输出三个tensor,第一个是原始低分辨率影像,第二个是放大两倍,第三个才是我们所需要的最后的结果 - pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 - pred_img = Image.fromarray(pred_img) # array转图像 + pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 + pred_img = Image.fromarray(pred_img) # array转图像 return pred_img #输入图像文件路径 @@ -70,15 +75,15 @@ def run(self, input): if not os.path.exists(self.output): os.makedirs(self.output) - pred_img = self.run_image(input) #对输入的图片进行预测 + pred_img = self.run_image(input) #对输入的图片进行预测 out_path = None if self.output: try: base_name = os.path.splitext(os.path.basename(input))[0] except: base_name = 'result' - out_path = os.path.join(self.output, base_name + '.png') #保存路径 - pred_img.save(out_path) #保存输出图片 + out_path = os.path.join(self.output, base_name + '.png') #保存路径 + pred_img.save(out_path) #保存输出图片 logger = get_logger() logger.info('Image saved to {}'.format(out_path)) diff --git a/paddlers/models/ppgan/apps/esrgan_predictor.py b/paddlers/models/ppgan/apps/esrgan_predictor.py index 12c9698d..f41dd535 100644 --- a/paddlers/models/ppgan/apps/esrgan_predictor.py +++ b/paddlers/models/ppgan/apps/esrgan_predictor.py @@ -28,6 +28,7 @@ SR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/esrgan_x4.pdparams' + class ESRGANPredictor(BasePredictor): def __init__(self, output='output', weight_path=None): self.input = input @@ -83,4 +84,4 @@ def run(self, input): logger = get_logger() logger.info('Image saved to {}'.format(out_path)) - return pred_img, out_path \ No newline at end of file + return pred_img, out_path diff --git a/paddlers/models/ppgan/apps/lesrcnn_predictor.py b/paddlers/models/ppgan/apps/lesrcnn_predictor.py index 651dbaa1..b0490304 100644 --- a/paddlers/models/ppgan/apps/lesrcnn_predictor.py +++ b/paddlers/models/ppgan/apps/lesrcnn_predictor.py @@ -16,7 +16,7 @@ import numpy as np from PIL import Image -import paddle +import paddle from ppgan.models.generators import LESRCNNGenerator from ppgan.utils.download import get_path_from_url from ppgan.utils.logger import get_logger @@ -25,21 +25,25 @@ REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/lesrcnn_x4.pdparams' + class LESRCNNPredictor(BasePredictor): def __init__(self, output='output', weight_path=None): self.input = input - self.output = os.path.join(output, 'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 - self.model = LESRCNNGenerator() # 实例化模型 + self.output = os.path.join(output, + 'LESRCNN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 + self.model = LESRCNNGenerator() # 实例化模型 if weight_path is None: weight_path = get_path_from_url(REALSR_WEIGHT_URL) - state_dict = paddle.load(weight_path) #加载权重 - state_dict = state_dict['generator'] + state_dict = paddle.load(weight_path) #加载权重 + state_dict = state_dict['generator'] self.model.load_dict(state_dict) self.model.eval() + # 标准化 def norm(self, img): img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0 return img.astype('float32') + # 去标准化 def denorm(self, img): img = img.transpose((1, 2, 0)) @@ -54,14 +58,13 @@ def run_image(self, img): elif isinstance(img, Image.Image): ori_img = img - img = self.norm(ori_img) #图像标准化 - x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor + img = self.norm(ori_img) #图像标准化 + x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor with paddle.no_grad(): out = self.model(x) - - pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 - pred_img = Image.fromarray(pred_img) # array转图像 + pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 + pred_img = Image.fromarray(pred_img) # array转图像 return pred_img #输入图像文件路径 @@ -70,16 +73,16 @@ def run(self, input): if not os.path.exists(self.output): os.makedirs(self.output) - pred_img = self.run_image(input) #对输入的图片进行预测 + pred_img = self.run_image(input) #对输入的图片进行预测 out_path = None if self.output: try: base_name = os.path.splitext(os.path.basename(input))[0] except: base_name = 'result' - out_path = os.path.join(self.output, base_name + '.png') #保存路径 - pred_img.save(out_path) #保存输出图片 + out_path = os.path.join(self.output, base_name + '.png') #保存路径 + pred_img.save(out_path) #保存输出图片 logger = get_logger() logger.info('Image saved to {}'.format(out_path)) - return pred_img, out_path \ No newline at end of file + return pred_img, out_path diff --git a/paddlers/models/ppgan/apps/midas/blocks.py b/paddlers/models/ppgan/apps/midas/blocks.py index bd2c761c..bead2502 100644 --- a/paddlers/models/ppgan/apps/midas/blocks.py +++ b/paddlers/models/ppgan/apps/midas/blocks.py @@ -13,10 +13,8 @@ def _make_encoder(backbone, if backbone == "resnext101_wsl": # resnext101_wsl pretrained = _make_pretrained_resnext101_wsl(use_pretrained) - scratch = _make_scratch([256, 512, 1024, 2048], - features, - groups=groups, - expand=expand) + scratch = _make_scratch( + [256, 512, 1024, 2048], features, groups=groups, expand=expand) else: print(f"Backbone '{backbone}' not implemented") assert False @@ -36,34 +34,38 @@ def _make_scratch(in_shape, out_shape, groups=1, expand=False): out_shape3 = out_shape * 4 out_shape4 = out_shape * 8 - scratch.layer1_rn = nn.Conv2D(in_shape[0], - out_shape1, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False, - groups=groups) - scratch.layer2_rn = nn.Conv2D(in_shape[1], - out_shape2, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False, - groups=groups) - scratch.layer3_rn = nn.Conv2D(in_shape[2], - out_shape3, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False, - groups=groups) - scratch.layer4_rn = nn.Conv2D(in_shape[3], - out_shape4, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False, - groups=groups) + scratch.layer1_rn = nn.Conv2D( + in_shape[0], + out_shape1, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=groups) + scratch.layer2_rn = nn.Conv2D( + in_shape[1], + out_shape2, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=groups) + scratch.layer3_rn = nn.Conv2D( + in_shape[2], + out_shape3, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=groups) + scratch.layer4_rn = nn.Conv2D( + in_shape[3], + out_shape4, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + groups=groups) return scratch @@ -89,6 +91,7 @@ def _make_pretrained_resnext101_wsl(use_pretrained): class ResidualConvUnit(nn.Layer): """Residual convolution module. """ + def __init__(self, features): """Init. @@ -97,19 +100,21 @@ def __init__(self, features): """ super().__init__() - self.conv1 = nn.Conv2D(features, - features, - kernel_size=3, - stride=1, - padding=1, - bias_attr=True) - - self.conv2 = nn.Conv2D(features, - features, - kernel_size=3, - stride=1, - padding=1, - bias_attr=True) + self.conv1 = nn.Conv2D( + features, + features, + kernel_size=3, + stride=1, + padding=1, + bias_attr=True) + + self.conv2 = nn.Conv2D( + features, + features, + kernel_size=3, + stride=1, + padding=1, + bias_attr=True) self.relu = nn.ReLU() @@ -133,6 +138,7 @@ def forward(self, x): class FeatureFusionBlock(nn.Layer): """Feature fusion block. """ + def __init__(self, features): """Init. @@ -156,9 +162,7 @@ def forward(self, *xs): output += self.resConfUnit1(xs[1]) output = self.resConfUnit2(output) - output = nn.functional.interpolate(output, - scale_factor=2, - mode="bilinear", - align_corners=True) + output = nn.functional.interpolate( + output, scale_factor=2, mode="bilinear", align_corners=True) return output diff --git a/paddlers/models/ppgan/apps/midas/midas_net.py b/paddlers/models/ppgan/apps/midas/midas_net.py index ef0a00ca..b774ff66 100644 --- a/paddlers/models/ppgan/apps/midas/midas_net.py +++ b/paddlers/models/ppgan/apps/midas/midas_net.py @@ -22,6 +22,7 @@ def load(self, path): class MidasNet(BaseModel): """Network for monocular depth estimation. """ + def __init__(self, path=None, features=256, non_negative=True): """Init. @@ -47,11 +48,15 @@ def __init__(self, path=None, features=256, non_negative=True): self.scratch.refinenet1 = FeatureFusionBlock(features) output_conv = [ - nn.Conv2D(features, 128, kernel_size=3, stride=1, padding=1), - nn.Upsample(scale_factor=2, mode="bilinear"), - nn.Conv2D(128, 32, kernel_size=3, stride=1, padding=1), + nn.Conv2D( + features, 128, kernel_size=3, stride=1, padding=1), + nn.Upsample( + scale_factor=2, mode="bilinear"), + nn.Conv2D( + 128, 32, kernel_size=3, stride=1, padding=1), nn.ReLU(), - nn.Conv2D(32, 1, kernel_size=1, stride=1, padding=0), + nn.Conv2D( + 32, 1, kernel_size=1, stride=1, padding=0), nn.ReLU() if non_negative else nn.Identity(), ] if non_negative: diff --git a/paddlers/models/ppgan/apps/midas/transforms.py b/paddlers/models/ppgan/apps/midas/transforms.py index 530c552d..2a5bb984 100644 --- a/paddlers/models/ppgan/apps/midas/transforms.py +++ b/paddlers/models/ppgan/apps/midas/transforms.py @@ -8,6 +8,7 @@ class Resize(object): """Resize sample to given size (width, height). """ + def __init__(self, width, height, @@ -96,15 +97,15 @@ def get_size(self, width, height): f"resize_method {self.__resize_method} not implemented") if self.__resize_method == "lower_bound": - new_height = self.constrain_to_multiple_of(scale_height * height, - min_val=self.__height) - new_width = self.constrain_to_multiple_of(scale_width * width, - min_val=self.__width) + new_height = self.constrain_to_multiple_of( + scale_height * height, min_val=self.__height) + new_width = self.constrain_to_multiple_of( + scale_width * width, min_val=self.__width) elif self.__resize_method == "upper_bound": - new_height = self.constrain_to_multiple_of(scale_height * height, - max_val=self.__height) - new_width = self.constrain_to_multiple_of(scale_width * width, - max_val=self.__width) + new_height = self.constrain_to_multiple_of( + scale_height * height, max_val=self.__height) + new_width = self.constrain_to_multiple_of( + scale_width * width, max_val=self.__width) elif self.__resize_method == "minimal": new_height = self.constrain_to_multiple_of(scale_height * height) new_width = self.constrain_to_multiple_of(scale_width * width) @@ -122,26 +123,24 @@ def __call__(self, sample): sample["image"] = cv2.resize( sample["image"], (width, height), - interpolation=self.__image_interpolation_method, - ) + interpolation=self.__image_interpolation_method, ) if self.__resize_target: if "disparity" in sample: sample["disparity"] = cv2.resize( sample["disparity"], (width, height), - interpolation=cv2.INTER_NEAREST, - ) + interpolation=cv2.INTER_NEAREST, ) if "depth" in sample: - sample["depth"] = cv2.resize(sample["depth"], (width, height), - interpolation=cv2.INTER_NEAREST) + sample["depth"] = cv2.resize( + sample["depth"], (width, height), + interpolation=cv2.INTER_NEAREST) sample["mask"] = cv2.resize( sample["mask"].astype(np.float32), (width, height), - interpolation=cv2.INTER_NEAREST, - ) + interpolation=cv2.INTER_NEAREST, ) sample["mask"] = sample["mask"].astype(bool) return sample @@ -150,6 +149,7 @@ def __call__(self, sample): class NormalizeImage(object): """Normlize image by given mean and std. """ + def __init__(self, mean, std): self.__mean = mean self.__std = std @@ -163,6 +163,7 @@ def __call__(self, sample): class PrepareForNet(object): """Prepare sample for usage as network input. """ + def __init__(self): pass diff --git a/paddlers/models/ppgan/apps/midas/utils.py b/paddlers/models/ppgan/apps/midas/utils.py index 3054a491..5a8b939f 100644 --- a/paddlers/models/ppgan/apps/midas/utils.py +++ b/paddlers/models/ppgan/apps/midas/utils.py @@ -26,8 +26,8 @@ def write_pfm(path, image, scale=1): if len(image.shape) == 3 and image.shape[2] == 3: # color image color = True - elif (len(image.shape) == 2 - or len(image.shape) == 3 and image.shape[2] == 1): # greyscale + elif (len(image.shape) == 2 or len(image.shape) == 3 and + image.shape[2] == 1): # greyscale color = False else: raise Exception( diff --git a/paddlers/models/ppgan/apps/mpr_predictor.py b/paddlers/models/ppgan/apps/mpr_predictor.py index 40a9fc32..9c84178a 100644 --- a/paddlers/models/ppgan/apps/mpr_predictor.py +++ b/paddlers/models/ppgan/apps/mpr_predictor.py @@ -91,10 +91,10 @@ def __init__(self, def get_images(self, images_path): if os.path.isdir(images_path): return natsorted( - glob(os.path.join(images_path, '*.jpg')) + - glob(os.path.join(images_path, '*.JPG')) + - glob(os.path.join(images_path, '*.png')) + - glob(os.path.join(images_path, '*.PNG'))) + glob(os.path.join(images_path, '*.jpg')) + glob( + os.path.join(images_path, '*.JPG')) + glob( + os.path.join(images_path, '*.png')) + glob( + os.path.join(images_path, '*.PNG'))) else: return [images_path] diff --git a/paddlers/models/ppgan/apps/pan_predictor.py b/paddlers/models/ppgan/apps/pan_predictor.py index ddb6b26d..27cb9263 100644 --- a/paddlers/models/ppgan/apps/pan_predictor.py +++ b/paddlers/models/ppgan/apps/pan_predictor.py @@ -16,7 +16,7 @@ import numpy as np from PIL import Image -import paddle +import paddle from ppgan.models.generators import PAN from ppgan.utils.download import get_path_from_url from ppgan.utils.logger import get_logger @@ -25,21 +25,25 @@ REALSR_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/pan_x4.pdparams' + class PANPredictor(BasePredictor): def __init__(self, output='output', weight_path=None): self.input = input - self.output = os.path.join(output, 'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 - self.model = PAN(3, 3, 40, 24, 16) # 实例化模型 + self.output = os.path.join(output, + 'PAN') #定义超分的结果保存的路径,为output路径+模型名所在文件夹 + self.model = PAN(3, 3, 40, 24, 16) # 实例化模型 if weight_path is None: weight_path = get_path_from_url(REALSR_WEIGHT_URL) - state_dict = paddle.load(weight_path) #加载权重 - state_dict = state_dict['generator'] + state_dict = paddle.load(weight_path) #加载权重 + state_dict = state_dict['generator'] self.model.load_dict(state_dict) self.model.eval() + # 标准化 def norm(self, img): img = np.array(img).transpose([2, 0, 1]).astype('float32') / 255.0 return img.astype('float32') + # 去标准化 def denorm(self, img): img = img.transpose((1, 2, 0)) @@ -54,14 +58,13 @@ def run_image(self, img): elif isinstance(img, Image.Image): ori_img = img - img = self.norm(ori_img) #图像标准化 - x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor + img = self.norm(ori_img) #图像标准化 + x = paddle.to_tensor(img[np.newaxis, ...]) #转成tensor with paddle.no_grad(): out = self.model(x) - - pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 - pred_img = Image.fromarray(pred_img) # array转图像 + pred_img = self.denorm(out.numpy()[0]) #tensor转成numpy的array并去标准化 + pred_img = Image.fromarray(pred_img) # array转图像 return pred_img #输入图像文件路径 @@ -70,17 +73,16 @@ def run(self, input): if not os.path.exists(self.output): os.makedirs(self.output) - pred_img = self.run_image(input) #对输入的图片进行预测 + pred_img = self.run_image(input) #对输入的图片进行预测 out_path = None if self.output: try: base_name = os.path.splitext(os.path.basename(input))[0] except: base_name = 'result' - out_path = os.path.join(self.output, base_name + '.png') #保存路径 - pred_img.save(out_path) #保存输出图片 + out_path = os.path.join(self.output, base_name + '.png') #保存路径 + pred_img.save(out_path) #保存输出图片 logger = get_logger() logger.info('Image saved to {}'.format(out_path)) return pred_img, out_path - diff --git a/paddlers/models/ppgan/datasets/animeganv2_dataset.py b/paddlers/models/ppgan/datasets/animeganv2_dataset.py index 7f70ef8b..57a293e7 100644 --- a/paddlers/models/ppgan/datasets/animeganv2_dataset.py +++ b/paddlers/models/ppgan/datasets/animeganv2_dataset.py @@ -27,6 +27,7 @@ class AnimeGANV2Dataset(paddle.io.Dataset): """ """ + def __init__(self, dataroot, style, @@ -50,18 +51,14 @@ def __init__(self, self.anime_root = os.path.join(self.root, f'{self.style}', 'style') self.smooth_root = os.path.join(self.root, f'{self.style}', 'smooth') - self.real = ImageFolder(self.real_root, - transform=self.transform_real, - loader=self.loader) - self.anime = ImageFolder(self.anime_root, - transform=self.transform_anime, - loader=self.loader) - self.anime_gray = ImageFolder(self.anime_root, - transform=self.transform_gray, - loader=self.loader) - self.smooth_gray = ImageFolder(self.smooth_root, - transform=self.transform_gray, - loader=self.loader) + self.real = ImageFolder( + self.real_root, transform=self.transform_real, loader=self.loader) + self.anime = ImageFolder( + self.anime_root, transform=self.transform_anime, loader=self.loader) + self.anime_gray = ImageFolder( + self.anime_root, transform=self.transform_gray, loader=self.loader) + self.smooth_gray = ImageFolder( + self.smooth_root, transform=self.transform_gray, loader=self.loader) self.sizes = [ len(fold) for fold in [self.real, self.anime, self.smooth_gray] ] @@ -70,8 +67,9 @@ def __init__(self, @staticmethod def loader(path): - return cv2.cvtColor(cv2.imread(path, flags=cv2.IMREAD_COLOR), - cv2.COLOR_BGR2RGB) + return cv2.cvtColor( + cv2.imread( + path, flags=cv2.IMREAD_COLOR), cv2.COLOR_BGR2RGB) def reshuffle(self): indexs = [] diff --git a/paddlers/models/ppgan/datasets/base_dataset.py b/paddlers/models/ppgan/datasets/base_dataset.py index 229c8e23..c5535441 100644 --- a/paddlers/models/ppgan/datasets/base_dataset.py +++ b/paddlers/models/ppgan/datasets/base_dataset.py @@ -57,9 +57,8 @@ def _scandir(dir_path, suffix, recursive): yield rel_path else: if recursive: - yield from _scandir(entry.path, - suffix=suffix, - recursive=recursive) + yield from _scandir( + entry.path, suffix=suffix, recursive=recursive) else: continue @@ -79,6 +78,7 @@ class BaseDataset(Dataset, metaclass=ABCMeta): preprocess (list[dict]): A sequence of data preprocess config. """ + def __init__(self, preprocess=None): super(BaseDataset, self).__init__() diff --git a/paddlers/models/ppgan/datasets/base_sr_dataset.py b/paddlers/models/ppgan/datasets/base_sr_dataset.py index 306ad9ad..43352bc5 100644 --- a/paddlers/models/ppgan/datasets/base_sr_dataset.py +++ b/paddlers/models/ppgan/datasets/base_sr_dataset.py @@ -23,6 +23,7 @@ @DATASETS.register() class SRDataset(BaseDataset): """Base super resulotion dataset for image restoration.""" + def __init__(self, lq_folder, gt_folder, diff --git a/paddlers/models/ppgan/datasets/builder.py b/paddlers/models/ppgan/datasets/builder.py index 9ee1f41c..cba6ee41 100644 --- a/paddlers/models/ppgan/datasets/builder.py +++ b/paddlers/models/ppgan/datasets/builder.py @@ -48,21 +48,24 @@ def build_dataloader(cfg, is_train=True, distributed=True): dataset = build_dataset(cfg_) if distributed: - sampler = DistributedBatchSampler(dataset, - batch_size=batch_size, - shuffle=True if is_train else False, - drop_last=True if is_train else False) + sampler = DistributedBatchSampler( + dataset, + batch_size=batch_size, + shuffle=True if is_train else False, + drop_last=True if is_train else False) - dataloader = paddle.io.DataLoader(dataset, - batch_sampler=sampler, - num_workers=num_workers, - use_shared_memory=use_shared_memory) + dataloader = paddle.io.DataLoader( + dataset, + batch_sampler=sampler, + num_workers=num_workers, + use_shared_memory=use_shared_memory) else: - dataloader = paddle.io.DataLoader(dataset, - batch_size=batch_size, - shuffle=True if is_train else False, - drop_last=True if is_train else False, - use_shared_memory=use_shared_memory, - num_workers=num_workers) + dataloader = paddle.io.DataLoader( + dataset, + batch_size=batch_size, + shuffle=True if is_train else False, + drop_last=True if is_train else False, + use_shared_memory=use_shared_memory, + num_workers=num_workers) return dataloader diff --git a/paddlers/models/ppgan/datasets/common_vision_dataset.py b/paddlers/models/ppgan/datasets/common_vision_dataset.py index 8b039265..5996add5 100644 --- a/paddlers/models/ppgan/datasets/common_vision_dataset.py +++ b/paddlers/models/ppgan/datasets/common_vision_dataset.py @@ -25,6 +25,7 @@ class CommonVisionDataset(paddle.io.Dataset): """ Dataset for using paddle vision default datasets, such as mnist, flowers. """ + def __init__(self, dataset_name, transforms=None, diff --git a/paddlers/models/ppgan/datasets/firstorder_dataset.py b/paddlers/models/ppgan/datasets/firstorder_dataset.py index 31749b4f..5660b781 100644 --- a/paddlers/models/ppgan/datasets/firstorder_dataset.py +++ b/paddlers/models/ppgan/datasets/firstorder_dataset.py @@ -48,13 +48,13 @@ def __init__(self, **cfg): file_idx_set = list(file_idx_set) if len(file_idx_set) != 0: if POOL_SIZE == 0: - for idx in tqdm.tqdm(file_idx_set, - desc='Extracting frames'): + for idx in tqdm.tqdm( + file_idx_set, desc='Extracting frames'): _ = self.frameDataset[idx] else: # multiprocessing - bar = tqdm.tqdm(total=len(file_idx_set), - desc='Extracting frames') + bar = tqdm.tqdm( + total=len(file_idx_set), desc='Extracting frames') with Pool(POOL_SIZE) as pl: _p = 0 while _p <= len(file_idx_set) - 1: @@ -90,10 +90,10 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'): - folder with videos """ if name.is_dir(): - frames = sorted(name.iterdir(), - key=lambda x: int(x.with_suffix('').name)) - video_array = np.array([imread(path) for path in frames], - dtype='float32') + frames = sorted( + name.iterdir(), key=lambda x: int(x.with_suffix('').name)) + video_array = np.array( + [imread(path) for path in frames], dtype='float32') return video_array elif name.suffix.lower() in ['.gif', '.mp4', '.mov']: try: @@ -123,7 +123,8 @@ def read_video(name: Path, frame_shape=tuple([256, 256, 3]), saveto='folder'): except FileExistsError: pass for idx, img in enumerate(video_array_reshape): - cv2.imwrite(str(sub_dir.joinpath('%i.png' % idx)), img[:,:,[2,1,0]]) + cv2.imwrite( + str(sub_dir.joinpath('%i.png' % idx)), img[:, :, [2, 1, 0]]) name.unlink() return video_array_reshape else: @@ -138,6 +139,7 @@ class FramesDataset(Dataset): - folder with all frames FramesDataset[i]: obtain sample from i-th video in self.videos """ + def __init__(self, cfg): self.root_dir = Path(cfg['dataroot']) self.videos = None @@ -161,8 +163,8 @@ def __init__(self, cfg): else: train_videos = list(self.root_dir.joinpath('train').iterdir()) test_videos = list(self.root_dir.joinpath('test').iterdir()) - self.root_dir = self.root_dir.joinpath( - 'train' if self.is_train else 'test') + self.root_dir = self.root_dir.joinpath('train' + if self.is_train else 'test') if self.is_train: self.videos = train_videos @@ -184,23 +186,22 @@ def __getitem__(self, idx): path = self.videos[idx] video_name = path.name if self.is_train and path.is_dir(): - frames = sorted(path.iterdir(), - key=lambda x: int(x.with_suffix('').name)) + frames = sorted( + path.iterdir(), key=lambda x: int(x.with_suffix('').name)) num_frames = len(frames) frame_idx = np.sort( - np.random.choice(num_frames, replace=True, size=2)) + np.random.choice( + num_frames, replace=True, size=2)) video_array = [imread(str(frames[idx])) for idx in frame_idx] else: if self.create_frames_folder: - video_array = read_video(path, - frame_shape=self.frame_shape, - saveto='folder') + video_array = read_video( + path, frame_shape=self.frame_shape, saveto='folder') self.videos[idx] = path.with_suffix( '') # rename /xx/xx/xx.gif -> /xx/xx/xx else: - video_array = read_video(path, - frame_shape=self.frame_shape, - saveto=None) + video_array = read_video( + path, frame_shape=self.frame_shape, saveto=None) num_frames = len(video_array) frame_idx = np.sort( np.random.choice( @@ -220,13 +221,14 @@ def __getitem__(self, idx): if self.is_train: if self.transform is not None: #modify t = self.transform(tuple(video_array)) - out['driving'] = t[0].transpose(2, 0, 1).astype( - np.float32) / 255.0 - out['source'] = t[1].transpose(2, 0, 1).astype( - np.float32) / 255.0 + out['driving'] = t[0].transpose(2, 0, + 1).astype(np.float32) / 255.0 + out['source'] = t[1].transpose(2, 0, + 1).astype(np.float32) / 255.0 else: - source = np.array(video_array[0], - dtype='float32') / 255.0 # shape is [H, W, C] + source = np.array( + video_array[0], + dtype='float32') / 255.0 # shape is [H, W, C] driving = np.array( video_array[1], dtype='float32') / 255.0 # shape is [H, W, C] @@ -250,6 +252,7 @@ class DatasetRepeater(Dataset): """ Pass several times over the same dataset for better i/o performance """ + def __init__(self, dataset, num_repeats=100): self.dataset = dataset self.num_repeats = num_repeats diff --git a/paddlers/models/ppgan/datasets/image_folder.py b/paddlers/models/ppgan/datasets/image_folder.py index a9eec8aa..12d6a6f1 100644 --- a/paddlers/models/ppgan/datasets/image_folder.py +++ b/paddlers/models/ppgan/datasets/image_folder.py @@ -70,8 +70,8 @@ def __init__(self, imgs = make_dataset(root) if len(imgs) == 0: raise (RuntimeError("Found 0 images in: " + root + "\n" - "Supported image extensions are: " + - ",".join(IMG_EXTENSIONS))) + "Supported image extensions are: " + ",".join( + IMG_EXTENSIONS))) self.root = root self.imgs = imgs diff --git a/paddlers/models/ppgan/datasets/paired_dataset.py b/paddlers/models/ppgan/datasets/paired_dataset.py index 503d9202..70a917e2 100644 --- a/paddlers/models/ppgan/datasets/paired_dataset.py +++ b/paddlers/models/ppgan/datasets/paired_dataset.py @@ -20,6 +20,7 @@ class PairedDataset(BaseDataset): """A dataset class for paired image dataset. """ + def __init__(self, dataroot, preprocess): """Initialize this dataset class. diff --git a/paddlers/models/ppgan/datasets/preprocess/__init__.py b/paddlers/models/ppgan/datasets/preprocess/__init__.py index 1712224e..ee1aa3e3 100644 --- a/paddlers/models/ppgan/datasets/preprocess/__init__.py +++ b/paddlers/models/ppgan/datasets/preprocess/__init__.py @@ -1,8 +1,7 @@ from .io import LoadImageFromFile, ReadImageSequence, GetNeighboringFramesIdx -from .transforms import (PairedRandomCrop, PairedRandomHorizontalFlip, - PairedRandomVerticalFlip, PairedRandomTransposeHW, - SRPairedRandomCrop, SplitPairedImage, SRNoise, - NormalizeSequence, MirrorVideoSequence, - TransposeSequence) +from .transforms import ( + PairedRandomCrop, PairedRandomHorizontalFlip, PairedRandomVerticalFlip, + PairedRandomTransposeHW, SRPairedRandomCrop, SplitPairedImage, SRNoise, + NormalizeSequence, MirrorVideoSequence, TransposeSequence) from .builder import build_preprocess diff --git a/paddlers/models/ppgan/datasets/preprocess/builder.py b/paddlers/models/ppgan/datasets/preprocess/builder.py index bb6c7dec..eaf499d7 100644 --- a/paddlers/models/ppgan/datasets/preprocess/builder.py +++ b/paddlers/models/ppgan/datasets/preprocess/builder.py @@ -35,6 +35,7 @@ class Compose(object): object will call each given :attr:`transforms` sequencely. """ + def __init__(self, functions): self.functions = functions diff --git a/paddlers/models/ppgan/datasets/preprocess/io.py b/paddlers/models/ppgan/datasets/preprocess/io.py index d8ce34e4..bd35a9dd 100644 --- a/paddlers/models/ppgan/datasets/preprocess/io.py +++ b/paddlers/models/ppgan/datasets/preprocess/io.py @@ -18,6 +18,7 @@ class LoadImageFromFile(object): `datas` dict with name of `f'ori_{key}'`. Default: False. kwargs (dict): Args for file client. """ + def __init__(self, key='image', flag=-1, @@ -74,6 +75,7 @@ class ReadImageSequence(LoadImageFromFile): `datas` dict with name of `f'ori_{key}'`. Default: False. kwargs (dict): Args for file client. """ + def __call__(self, datas): """Call function. @@ -130,6 +132,7 @@ class GetNeighboringFramesIdx: sequence. Default: 0. filename_tmpl (str): Template for file name. Default: '{:08d}.png'. """ + def __init__(self, interval_list, start_idx=0, filename_tmpl='{:08d}.png'): self.interval_list = interval_list self.filename_tmpl = filename_tmpl diff --git a/paddlers/models/ppgan/datasets/preprocess/transforms.py b/paddlers/models/ppgan/datasets/preprocess/transforms.py index 3064bb39..00a7128a 100644 --- a/paddlers/models/ppgan/datasets/preprocess/transforms.py +++ b/paddlers/models/ppgan/datasets/preprocess/transforms.py @@ -61,8 +61,8 @@ def __call__(self, datas): data = tuple(data) for transform in self.transforms: data = transform(data) - if hasattr(transform, 'params') and isinstance( - transform.params, dict): + if hasattr(transform, 'params') and isinstance(transform.params, + dict): datas.update(transform.params) if len(self.input_keys) > 1: @@ -176,6 +176,7 @@ class PairedRandomTransposeHW(T.BaseTransform): prob (float): The propability to transpose the images. keys (list[str]): The images to be transposed. """ + def __init__(self, prob=0.5, keys=None): self.keys = keys self.prob = prob @@ -220,6 +221,7 @@ class TransposeSequence(T.Transpose): fake_img_seq = transform(fake_img_seq) """ + def _apply_image(self, img): if isinstance(img, list): imgs = [] @@ -277,6 +279,7 @@ class NormalizeSequence(T.Normalize): fake_img_seq = normalize_seq(fake_img_seq) """ + def _apply_image(self, img): if isinstance(img, list): imgs = [ @@ -302,6 +305,7 @@ class SRPairedRandomCrop(T.BaseTransform): scale (int): model upscale factor. gt_patch_size (int): cropped gt patch size. """ + def __init__(self, scale, gt_patch_size, scale_list=False, keys=None): self.gt_patch_size = gt_patch_size self.scale = scale @@ -339,16 +343,16 @@ def __call__(self, inputs): ] top_gt, left_gt = int(top * scale), int(left * scale) gt = [ - v[top_gt:top_gt + self.gt_patch_size, - left_gt:left_gt + self.gt_patch_size, ...] for v in gt + v[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt + + self.gt_patch_size, ...] for v in gt ] else: # crop lq patch lq = lq[top:top + lq_patch_size, left:left + lq_patch_size, ...] # crop corresponding gt patch top_gt, left_gt = int(top * scale), int(left * scale) - gt = gt[top_gt:top_gt + self.gt_patch_size, - left_gt:left_gt + self.gt_patch_size, ...] + gt = gt[top_gt:top_gt + self.gt_patch_size, left_gt:left_gt + + self.gt_patch_size, ...] if self.scale_list and self.scale == 4: lqx2 = F.resize(gt, (lq_patch_size * 2, lq_patch_size * 2), @@ -368,14 +372,14 @@ class SRNoise(T.BaseTransform): noise_path (str): directory of noise image. size (int): cropped noise patch size. """ + def __init__(self, noise_path, size, keys=None): self.noise_path = noise_path self.noise_imgs = sorted(glob.glob(noise_path + '*.png')) self.size = size self.keys = keys self.transform = T.Compose([ - T.RandomCrop(size), - T.Transpose(), + T.RandomCrop(size), T.Transpose(), T.Normalize([0., 0., 0.], [255., 255., 255.]) ]) @@ -396,6 +400,7 @@ class RandomResizedCropProb(T.RandomResizedCrop): prob (float): probabilty of using random-resized cropping. size (int): cropped size. """ + def __init__(self, prob, size, scale, ratio, interpolation, keys=None): super().__init__(size, scale, ratio, interpolation) self.prob = prob @@ -480,21 +485,14 @@ def _apply_image(self, image): @TRANSFORMS.register() class PairedColorJitter(T.BaseTransform): - def __init__(self, - brightness=0, - contrast=0, - saturation=0, - hue=0, + def __init__(self, brightness=0, contrast=0, saturation=0, hue=0, keys=None): super().__init__(keys=keys) self.brightness = T.transforms._check_input(brightness, 'brightness') self.contrast = T.transforms._check_input(contrast, 'contrast') self.saturation = T.transforms._check_input(saturation, 'saturation') - self.hue = T.transforms._check_input(hue, - 'hue', - center=0, - bound=(-0.5, 0.5), - clip_first_on_zero=False) + self.hue = T.transforms._check_input( + hue, 'hue', center=0, bound=(-0.5, 0.5), clip_first_on_zero=False) def _get_params(self, input): """Get a randomized transform to be applied on image. @@ -545,6 +543,7 @@ class MirrorVideoSequence: Args: keys (list[str]): The frame lists to be extended. """ + def __init__(self, keys=None): self.keys = keys diff --git a/paddlers/models/ppgan/datasets/single_dataset.py b/paddlers/models/ppgan/datasets/single_dataset.py index 98661567..29f4259c 100644 --- a/paddlers/models/ppgan/datasets/single_dataset.py +++ b/paddlers/models/ppgan/datasets/single_dataset.py @@ -20,6 +20,7 @@ class SingleDataset(BaseDataset): """ """ + def __init__(self, dataroot, preprocess): """Initialize single dataset class. diff --git a/paddlers/models/ppgan/datasets/starganv2_dataset.py b/paddlers/models/ppgan/datasets/starganv2_dataset.py index 0985b13c..39fa232a 100644 --- a/paddlers/models/ppgan/datasets/starganv2_dataset.py +++ b/paddlers/models/ppgan/datasets/starganv2_dataset.py @@ -107,6 +107,7 @@ def __len__(self): class StarGANv2Dataset(BaseDataset): """ """ + def __init__(self, dataroot, is_train, preprocess, test_count=0): """Initialize single dataset class. @@ -125,10 +126,10 @@ def __init__(self, dataroot, is_train, preprocess, test_count=0): else: files = os.listdir(self.dataroot) if 'src' in files and 'ref' in files: - self.src_loader = ImageFolder(os.path.join( - self.dataroot, 'src')) - self.ref_loader = ImageFolder(os.path.join( - self.dataroot, 'ref')) + self.src_loader = ImageFolder( + os.path.join(self.dataroot, 'src')) + self.ref_loader = ImageFolder( + os.path.join(self.dataroot, 'ref')) else: self.src_loader = ImageFolder(self.dataroot) self.ref_loader = ImageFolder(self.dataroot) diff --git a/paddlers/models/ppgan/datasets/unpaired_dataset.py b/paddlers/models/ppgan/datasets/unpaired_dataset.py index b55cb7c7..a49767aa 100644 --- a/paddlers/models/ppgan/datasets/unpaired_dataset.py +++ b/paddlers/models/ppgan/datasets/unpaired_dataset.py @@ -23,6 +23,7 @@ class UnpairedDataset(BaseDataset): """ """ + def __init__(self, dataroot_a, dataroot_b, max_size, is_train, preprocess): """Initialize unpaired dataset class. diff --git a/paddlers/models/ppgan/engine/trainer.py b/paddlers/models/ppgan/engine/trainer.py index 9184e641..74ecf21e 100644 --- a/paddlers/models/ppgan/engine/trainer.py +++ b/paddlers/models/ppgan/engine/trainer.py @@ -29,6 +29,7 @@ from ..utils.timer import TimeAverager from ..utils.profiler import add_profiler_step + class IterLoader: def __init__(self, dataloader): self._dataloader = dataloader @@ -71,6 +72,7 @@ class Trainer: # | || # save checkpoint (model.nets) \/ """ + def __init__(self, cfg): # base config self.logger = logging.getLogger(__name__) @@ -220,8 +222,8 @@ def train(self): def test(self): if not hasattr(self, 'test_dataloader'): - self.test_dataloader = build_dataloader(self.cfg.dataset.test, - is_train=False) + self.test_dataloader = build_dataloader( + self.cfg.dataset.test, is_train=False) iter_loader = IterLoader(self.test_dataloader) if self.max_eval_steps is None: self.max_eval_steps = len(self.test_dataloader) @@ -235,9 +237,8 @@ def test(self): for i in range(self.max_eval_steps): if self.max_eval_steps < self.log_interval or i % self.log_interval == 0: - self.logger.info('Test iter: [%d/%d]' % - (i * self.world_size, - self.max_eval_steps * self.world_size)) + self.logger.info('Test iter: [%d/%d]' % ( + i * self.world_size, self.max_eval_steps * self.world_size)) data = next(iter_loader) self.model.setup_input(data) @@ -248,8 +249,8 @@ def test(self): current_paths = self.model.get_image_paths() current_visuals = self.model.get_current_visuals() - if len(current_visuals) > 0 and list( - current_visuals.values())[0].shape == 4: + if len(current_visuals) > 0 and list(current_visuals.values())[ + 0].shape == 4: num_samples = list(current_visuals.values())[0].shape[0] else: num_samples = 1 @@ -267,10 +268,11 @@ def test(self): else: visual_results.update({name: img_tensor}) - self.visual('visual_test', - visual_results=visual_results, - step=self.batch_id, - is_save_image=True) + self.visual( + 'visual_test', + visual_results=visual_results, + step=self.batch_id, + is_save_image=True) if self.metrics: for metric_name, metric in self.metrics.items(): @@ -398,9 +400,9 @@ def save(self, epoch, name='checkpoint', keep=1): try: if self.by_epoch: checkpoint_name_to_be_removed = os.path.join( - self.output_dir, 'epoch_%s_%s.pdparams' % - ((epoch - keep * self.weight_interval) // - self.iters_per_epoch, name)) + self.output_dir, 'epoch_%s_%s.pdparams' % ( + (epoch - keep * self.weight_interval) // + self.iters_per_epoch, name)) else: checkpoint_name_to_be_removed = os.path.join( self.output_dir, 'iter_%s_%s.pdparams' % @@ -432,8 +434,8 @@ def load(self, weight_path): for net_name, net in self.model.nets.items(): if net_name in state_dicts: net.set_state_dict(state_dicts[net_name]) - self.logger.info( - 'Loaded pretrained weight for net {}'.format(net_name)) + self.logger.info('Loaded pretrained weight for net {}'.format( + net_name)) else: self.logger.warning( 'Can not find state dict of net {}. Skip load pretrained weight for net {}' diff --git a/paddlers/models/ppgan/faceutils/dlibutils/face_align.py b/paddlers/models/ppgan/faceutils/dlibutils/face_align.py index f18c5d9f..a03a4bda 100644 --- a/paddlers/models/ppgan/faceutils/dlibutils/face_align.py +++ b/paddlers/models/ppgan/faceutils/dlibutils/face_align.py @@ -56,7 +56,8 @@ def align(image, lms): # rotation angle left_eye_corner = lms[36] right_eye_corner = lms[45] - radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) / (left_eye_corner[0] - right_eye_corner[0])) + radian = np.arctan((left_eye_corner[1] - right_eye_corner[1]) / + (left_eye_corner[0] - right_eye_corner[0])) # image size after rotating height, width, _ = image.shape @@ -73,7 +74,8 @@ def align(image, lms): M = np.array([[cos, sin, (1 - cos) * width / 2. - sin * height / 2. + Tx], [-sin, cos, sin * width / 2. + (1 - cos) * height / 2. + Ty]]) - image_rotate = cv2.warpAffine(image, M, (new_w, new_h), borderValue=(255, 255, 255)) + image_rotate = cv2.warpAffine( + image, M, (new_w, new_h), borderValue=(255, 255, 255)) landmarks = np.concatenate([lms, np.ones((lms.shape[0], 1))], axis=1) landmarks_rotate = np.dot(M, landmarks.T).T @@ -99,7 +101,8 @@ def crop(image, lms): top -= ((right - left) - (bottom - top)) // 2 bottom = top + (right - left) - image_crop = np.ones((bottom - top + 1, right - left + 1, 3), np.uint8) * 255 + image_crop = np.ones((bottom - top + 1, right - left + 1, 3), + np.uint8) * 255 h, w = image.shape[:2] left_white = max(0, -left) @@ -111,5 +114,6 @@ def crop(image, lms): bottom = min(bottom, h - 1) bottom_white = top_white + (bottom - top) - image_crop[top_white:bottom_white+1, left_white:right_white+1] = image[top:bottom+1, left:right+1].copy() + image_crop[top_white:bottom_white + 1, left_white:right_white + 1] = image[ + top:bottom + 1, left:right + 1].copy() return image_crop diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py index f4f2b89d..b2fc020e 100644 --- a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py +++ b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/detect.py @@ -65,7 +65,8 @@ def batch_detect(net, img_batch): ymax = pred[:, 2:3] locs = np.concatenate((xmin, ymin, xmax, ymax), axis=1) bboxlists.append( - np.concatenate((locs * orig_size + shift, scores), axis=1)) + np.concatenate( + (locs * orig_size + shift, scores), axis=1)) return bboxlists diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py index 9e182708..fcdb4856 100644 --- a/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py +++ b/paddlers/models/ppgan/faceutils/face_detection/detection/blazeface/net_blazeface.py @@ -22,18 +22,19 @@ def __init__(self, in_channels, out_channels, kernel_size=3, stride=1): padding = (kernel_size - 1) // 2 self.convs = nn.Sequential( - nn.Conv2D(in_channels=in_channels, - out_channels=in_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - groups=in_channels), - nn.Conv2D(in_channels=in_channels, - out_channels=out_channels, - kernel_size=1, - stride=1, - padding=0), - ) + nn.Conv2D( + in_channels=in_channels, + out_channels=in_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + groups=in_channels), + nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=1, + stride=1, + padding=0), ) self.act = nn.ReLU() @@ -52,6 +53,7 @@ def forward(self, x): class BlazeFace(nn.Layer): """The BlazeFace face detection model. """ + def __init__(self): super(BlazeFace, self).__init__() @@ -70,32 +72,34 @@ def __init__(self): def _define_layers(self): self.backbone1 = nn.Sequential( - nn.Conv2D(in_channels=3, - out_channels=24, - kernel_size=5, - stride=2, - padding=0), + nn.Conv2D( + in_channels=3, + out_channels=24, + kernel_size=5, + stride=2, + padding=0), nn.ReLU(), BlazeBlock(24, 24), BlazeBlock(24, 28), - BlazeBlock(28, 32, stride=2), + BlazeBlock( + 28, 32, stride=2), BlazeBlock(32, 36), BlazeBlock(36, 42), - BlazeBlock(42, 48, stride=2), + BlazeBlock( + 42, 48, stride=2), BlazeBlock(48, 56), BlazeBlock(56, 64), BlazeBlock(64, 72), BlazeBlock(72, 80), - BlazeBlock(80, 88), - ) + BlazeBlock(80, 88), ) self.backbone2 = nn.Sequential( - BlazeBlock(88, 96, stride=2), - BlazeBlock(96, 96), + BlazeBlock( + 88, 96, stride=2), BlazeBlock(96, 96), BlazeBlock(96, 96), BlazeBlock(96, 96), - ) + BlazeBlock(96, 96), ) self.classifier_8 = nn.Conv2D(88, 2, 1) self.classifier_16 = nn.Conv2D(96, 6, 1) @@ -240,8 +244,8 @@ def _tensors_to_detections(self, raw_box_tensor, raw_score_tensor, anchors): output_detections = [] for i in range(raw_box_tensor.shape[0]): boxes = paddle.to_tensor(detection_boxes[i, mask[i]]) - scores = paddle.to_tensor( - detection_scores[i, mask[i]]).unsqueeze(axis=-1) + scores = paddle.to_tensor(detection_scores[i, mask[i]]).unsqueeze( + axis=-1) output_detections.append(paddle.concat((boxes, scores), axis=-1)) return output_detections @@ -296,8 +300,8 @@ def _weighted_non_max_suppression(self, detections): first_box = detection[:4] other_boxes = detections[remaining, :4] - ious = overlap_similarity(paddle.to_tensor(first_box), - paddle.to_tensor(other_boxes)) + ious = overlap_similarity( + paddle.to_tensor(first_box), paddle.to_tensor(other_boxes)) mask = ious > self.min_suppression_threshold mask = mask.numpy() diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py index b5493cc6..8b6a3186 100644 --- a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py +++ b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/detect.py @@ -95,8 +95,8 @@ def batch_detect(net, imgs): box = batch_decode(paddle.to_tensor(loc), priors, variances) box = box[:, 0] * 1.0 bboxlist.append( - paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)], - 1).numpy()) + paddle.concat([box, paddle.to_tensor(score).unsqueeze(1)], 1) + .numpy()) bboxlist = np.array(bboxlist) if 0 == len(bboxlist): bboxlist = np.zeros((1, BB, 5)) diff --git a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py index aa5a7db7..3fcd4ad3 100644 --- a/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py +++ b/paddlers/models/ppgan/faceutils/face_detection/detection/sfd/net_s3fd.py @@ -23,8 +23,8 @@ def __init__(self, n_channels, scale=1.0): self.n_channels = n_channels self.scale = scale self.eps = 1e-10 - self.weight = paddle.create_parameter(shape=[self.n_channels], - dtype='float32') + self.weight = paddle.create_parameter( + shape=[self.n_channels], dtype='float32') self.weight.set_value(paddle.zeros([self.n_channels]) + self.scale) def forward(self, x): @@ -67,67 +67,31 @@ def __init__(self): self.conv4_3_norm = L2Norm(512, scale=8) self.conv5_3_norm = L2Norm(512, scale=5) - self.conv3_3_norm_mbox_conf = nn.Conv2D(256, - 4, - kernel_size=3, - stride=1, - padding=1) - self.conv3_3_norm_mbox_loc = nn.Conv2D(256, - 4, - kernel_size=3, - stride=1, - padding=1) - self.conv4_3_norm_mbox_conf = nn.Conv2D(512, - 2, - kernel_size=3, - stride=1, - padding=1) - self.conv4_3_norm_mbox_loc = nn.Conv2D(512, - 4, - kernel_size=3, - stride=1, - padding=1) - self.conv5_3_norm_mbox_conf = nn.Conv2D(512, - 2, - kernel_size=3, - stride=1, - padding=1) - self.conv5_3_norm_mbox_loc = nn.Conv2D(512, - 4, - kernel_size=3, - stride=1, - padding=1) - - self.fc7_mbox_conf = nn.Conv2D(1024, - 2, - kernel_size=3, - stride=1, - padding=1) - self.fc7_mbox_loc = nn.Conv2D(1024, - 4, - kernel_size=3, - stride=1, - padding=1) - self.conv6_2_mbox_conf = nn.Conv2D(512, - 2, - kernel_size=3, - stride=1, - padding=1) - self.conv6_2_mbox_loc = nn.Conv2D(512, - 4, - kernel_size=3, - stride=1, - padding=1) - self.conv7_2_mbox_conf = nn.Conv2D(256, - 2, - kernel_size=3, - stride=1, - padding=1) - self.conv7_2_mbox_loc = nn.Conv2D(256, - 4, - kernel_size=3, - stride=1, - padding=1) + self.conv3_3_norm_mbox_conf = nn.Conv2D( + 256, 4, kernel_size=3, stride=1, padding=1) + self.conv3_3_norm_mbox_loc = nn.Conv2D( + 256, 4, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_conf = nn.Conv2D( + 512, 2, kernel_size=3, stride=1, padding=1) + self.conv4_3_norm_mbox_loc = nn.Conv2D( + 512, 4, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_conf = nn.Conv2D( + 512, 2, kernel_size=3, stride=1, padding=1) + self.conv5_3_norm_mbox_loc = nn.Conv2D( + 512, 4, kernel_size=3, stride=1, padding=1) + + self.fc7_mbox_conf = nn.Conv2D( + 1024, 2, kernel_size=3, stride=1, padding=1) + self.fc7_mbox_loc = nn.Conv2D( + 1024, 4, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_conf = nn.Conv2D( + 512, 2, kernel_size=3, stride=1, padding=1) + self.conv6_2_mbox_loc = nn.Conv2D( + 512, 4, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_conf = nn.Conv2D( + 256, 2, kernel_size=3, stride=1, padding=1) + self.conv7_2_mbox_loc = nn.Conv2D( + 256, 4, kernel_size=3, stride=1, padding=1) def forward(self, x): h = F.relu(self.conv1_1(x)) diff --git a/paddlers/models/ppgan/faceutils/face_detection/utils.py b/paddlers/models/ppgan/faceutils/face_detection/utils.py index 6590f966..b7a0b5b2 100644 --- a/paddlers/models/ppgan/faceutils/face_detection/utils.py +++ b/paddlers/models/ppgan/faceutils/face_detection/utils.py @@ -55,8 +55,8 @@ def crop(image, center, scale, resolution=256.0): br = transform([resolution, resolution], center, scale, resolution, True) br = br.numpy() if image.ndim > 2: - newDim = np.array([br[1] - ul[1], br[0] - ul[0], image.shape[2]], - dtype=np.int32) + newDim = np.array( + [br[1] - ul[1], br[0] - ul[0], image.shape[2]], dtype=np.int32) newImg = np.zeros(newDim, dtype=np.uint8) else: newDim = np.array([br[1] - ul[1], br[0] - ul[0]], dtype=np.int) @@ -69,10 +69,10 @@ def crop(image, center, scale, resolution=256.0): [max(1, -ul[1] + 1), min(br[1], ht) - ul[1]], dtype=np.int32) oldX = np.array([max(1, ul[0] + 1), min(br[0], wd)], dtype=np.int32) oldY = np.array([max(1, ul[1] + 1), min(br[1], ht)], dtype=np.int32) - newImg[newY[0] - 1:newY[1], - newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[1], - oldX[0] - 1:oldX[1], :] - newImg = cv2.resize(newImg, - dsize=(int(resolution), int(resolution)), - interpolation=cv2.INTER_LINEAR) + newImg[newY[0] - 1:newY[1], newX[0] - 1:newX[1]] = image[oldY[0] - 1:oldY[ + 1], oldX[0] - 1:oldX[1], :] + newImg = cv2.resize( + newImg, + dsize=(int(resolution), int(resolution)), + interpolation=cv2.INTER_LINEAR) return newImg diff --git a/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py b/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py index 055fc0ba..038feb9e 100644 --- a/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py +++ b/paddlers/models/ppgan/faceutils/face_enhancement/face_enhance.py @@ -25,11 +25,7 @@ class FaceEnhancement(object): - def __init__(self, - path_to_enhance=None, - size = 512, - batch_size=1 - ): + def __init__(self, path_to_enhance=None, size=512, batch_size=1): super(FaceEnhancement, self).__init__() # Initialise the face detector @@ -38,17 +34,19 @@ def __init__(self, model_weights = paddle.load(model_weights_path) else: model_weights = paddle.load(path_to_enhance) - + self.face_enhance = GPEN(size=512, style_dim=512, n_mlp=8) self.face_enhance.load_dict(model_weights) self.face_enhance.eval() self.size = size self.mask = np.zeros((512, 512), np.float32) - cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, cv2.LINE_AA) + cv2.rectangle(self.mask, (26, 26), (486, 486), (1, 1, 1), -1, + cv2.LINE_AA) self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11) self.mask = cv2.GaussianBlur(self.mask, (101, 101), 11) - self.mask = paddle.tile(paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1), repeat_times=[batch_size,1,1,3]).numpy() - + self.mask = paddle.tile( + paddle.to_tensor(self.mask).unsqueeze(0).unsqueeze(-1), + repeat_times=[batch_size, 1, 1, 3]).numpy() def enhance_from_image(self, img): if isinstance(img, np.ndarray): @@ -65,14 +63,14 @@ def enhance_from_batch(self, img): else: assert img.shape[1:] == [3, 512, 512] img_ori = img.transpose([0, 2, 3, 1]).numpy() - img_t = (img/255. - 0.5) / 0.5 - + img_t = (img / 255. - 0.5) / 0.5 + with paddle.no_grad(): out, __ = self.face_enhance(img_t) - + image_tensor = out * 0.5 + 0.5 - image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB + image_tensor = image_tensor.transpose([0, 2, 3, 1]) # RGB image_numpy = paddle.clip(image_tensor, 0, 1) * 255.0 - + out = image_numpy.astype(np.uint8).cpu().numpy() - return out * self.mask + (1-self.mask) * img_ori + return out * self.mask + (1 - self.mask) * img_ori diff --git a/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py b/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py index 113e80b2..99d3e2ee 100644 --- a/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py +++ b/paddlers/models/ppgan/faceutils/face_segmentation/face_seg.py @@ -21,13 +21,13 @@ from .fcn import FCN from .hrnet import HRNet_W18 - BISENET_WEIGHT_URL = 'https://paddlegan.bj.bcebos.com/models/faceseg_FCN-HRNetW18.pdparams' class FaceSeg: def __init__(self): - save_pth = get_path_from_url(BISENET_WEIGHT_URL, osp.split(osp.realpath(__file__))[0]) + save_pth = get_path_from_url(BISENET_WEIGHT_URL, + osp.split(osp.realpath(__file__))[0]) self.net = FCN(num_classes=2, backbone=HRNet_W18()) state_dict = paddle.load(save_pth) @@ -47,7 +47,8 @@ def __call__(self, image): return mask def input_transform(self, image): - image_input = cv2.resize(image, (384, 384), interpolation=cv2.INTER_AREA) + image_input = cv2.resize( + image, (384, 384), interpolation=cv2.INTER_AREA) image_input = (image_input / 255.)[np.newaxis, :, :, :] image_input = np.transpose(image_input, (0, 3, 1, 2)).astype(np.float32) image_input = paddle.to_tensor(image_input) diff --git a/paddlers/models/ppgan/metrics/lpips.py b/paddlers/models/ppgan/metrics/lpips.py index b9890963..65efd6c2 100644 --- a/paddlers/models/ppgan/metrics/lpips.py +++ b/paddlers/models/ppgan/metrics/lpips.py @@ -45,6 +45,7 @@ class LPIPSMetric(paddle.metric.Metric): Returns: float: lpips result. """ + def __init__(self, net='vgg', version='0.1', mean=None, std=None): self.net = net self.version = version @@ -76,10 +77,10 @@ def update(self, preds, gts): for pred, gt in zip(preds, gts): pred, gt = pred.astype(np.float32) / 255., gt.astype( np.float32) / 255. - pred = paddle.vision.transforms.normalize(pred.transpose([2, 0, 1]), - self.mean, self.std) - gt = paddle.vision.transforms.normalize(gt.transpose([2, 0, 1]), - self.mean, self.std) + pred = paddle.vision.transforms.normalize( + pred.transpose([2, 0, 1]), self.mean, self.std) + gt = paddle.vision.transforms.normalize( + gt.transpose([2, 0, 1]), self.mean, self.std) with paddle.no_grad(): value = self.loss_fn( @@ -110,11 +111,13 @@ def spatial_average(in_tens, keepdim=True): # assumes scale factor is same for H and W def upsample(in_tens, out_HW=(64, 64)): in_H, in_W = in_tens.shape[2], in_tens.shape[3] - scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[1] / in_W + scale_factor_H, scale_factor_W = 1. * out_HW[0] / in_H, 1. * out_HW[ + 1] / in_W - return nn.Upsample(scale_factor=(scale_factor_H, scale_factor_W), - mode='bilinear', - align_corners=False)(in_tens) + return nn.Upsample( + scale_factor=(scale_factor_H, scale_factor_W), + mode='bilinear', + align_corners=False)(in_tens) def normalize_tensor(in_feat, eps=1e-10): @@ -143,8 +146,8 @@ def __init__(self, if (verbose): print( 'Setting up [%s] perceptual loss: trunk [%s], v[%s], spatial [%s]' - % ('LPIPS' if lpips else 'baseline', net, version, - 'on' if spatial else 'off')) + % ('LPIPS' if lpips else 'baseline', net, version, 'on' + if spatial else 'off')) self.pnet_type = net self.pnet_tune = pnet_tune @@ -207,31 +210,35 @@ def forward(self, in0, in1, retPerLayer=False, normalize=False): feats0, feats1, diffs = {}, {}, {} for kk in range(self.L): - feats0[kk], feats1[kk] = normalize_tensor( - outs0[kk]), normalize_tensor(outs1[kk]) + feats0[kk], feats1[kk] = normalize_tensor(outs0[ + kk]), normalize_tensor(outs1[kk]) diffs[kk] = (feats0[kk] - feats1[kk])**2 if (self.lpips): if (self.spatial): res = [ - upsample(self.lins[kk].model(diffs[kk]), - out_HW=in0.shape[2:]) for kk in range(self.L) + upsample( + self.lins[kk].model(diffs[kk]), out_HW=in0.shape[2:]) + for kk in range(self.L) ] else: res = [ - spatial_average(self.lins[kk].model(diffs[kk]), - keepdim=True) for kk in range(self.L) + spatial_average( + self.lins[kk].model(diffs[kk]), keepdim=True) + for kk in range(self.L) ] else: if (self.spatial): res = [ - upsample(diffs[kk].sum(dim=1, keepdim=True), - out_HW=in0.shape[2:]) for kk in range(self.L) + upsample( + diffs[kk].sum(dim=1, keepdim=True), + out_HW=in0.shape[2:]) for kk in range(self.L) ] else: res = [ - spatial_average(diffs[kk].sum(dim=1, keepdim=True), - keepdim=True) for kk in range(self.L) + spatial_average( + diffs[kk].sum(dim=1, keepdim=True), keepdim=True) + for kk in range(self.L) ] val = res[0] @@ -251,8 +258,7 @@ def __init__(self): 'shift', paddle.to_tensor([-.030, -.088, -.188]).reshape([1, 3, 1, 1])) self.register_buffer( - 'scale', - paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1])) + 'scale', paddle.to_tensor([.458, .448, .450]).reshape([1, 3, 1, 1])) def forward(self, inp): return (inp - self.shift) / self.scale @@ -260,14 +266,14 @@ def forward(self, inp): class NetLinLayer(nn.Layer): ''' A single linear layer which does a 1x1 conv ''' + def __init__(self, chn_in, chn_out=1, use_dropout=False): super(NetLinLayer, self).__init__() - layers = [ - nn.Dropout(), - ] if (use_dropout) else [] + layers = [nn.Dropout(), ] if (use_dropout) else [] layers += [ - nn.Conv2D(chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False), + nn.Conv2D( + chn_in, chn_out, 1, stride=1, padding=0, bias_attr=False), ] self.model = nn.Sequential(*layers) diff --git a/paddlers/models/ppgan/metrics/psnr_ssim.py b/paddlers/models/ppgan/metrics/psnr_ssim.py index 7ed288a2..af19c292 100644 --- a/paddlers/models/ppgan/metrics/psnr_ssim.py +++ b/paddlers/models/ppgan/metrics/psnr_ssim.py @@ -170,9 +170,8 @@ def _ssim(img1, img2): sigma2_sq = cv2.filter2D(img2**2, -1, window)[5:-5, 5:-5] - mu2_sq sigma12 = cv2.filter2D(img1 * img2, -1, window)[5:-5, 5:-5] - mu1_mu2 - ssim_map = ((2 * mu1_mu2 + C1) * - (2 * sigma12 + C2)) / ((mu1_sq + mu2_sq + C1) * - (sigma1_sq + sigma2_sq + C2)) + ssim_map = ((2 * mu1_mu2 + C1) * (2 * sigma12 + C2)) / ( + (mu1_sq + mu2_sq + C1) * (sigma1_sq + sigma2_sq + C2)) return ssim_map.mean() diff --git a/paddlers/models/ppgan/models/animeganv2_model.py b/paddlers/models/ppgan/models/animeganv2_model.py index c2ee5de2..71c3e8f3 100644 --- a/paddlers/models/ppgan/models/animeganv2_model.py +++ b/paddlers/models/ppgan/models/animeganv2_model.py @@ -29,6 +29,7 @@ class AnimeGANV2Model(BaseModel): """ This class implements the AnimeGANV2 model. """ + def __init__(self, generator, discriminator=None, @@ -126,10 +127,11 @@ def con_sty_loss(self, real, anime, fake): @staticmethod def rgb2yuv(rgb): - kernel = paddle.to_tensor([[0.299, -0.14714119, 0.61497538], - [0.587, -0.28886916, -0.51496512], - [0.114, 0.43601035, -0.10001026]], - dtype='float32') + kernel = paddle.to_tensor( + [[0.299, -0.14714119, 0.61497538], + [0.587, -0.28886916, -0.51496512], + [0.114, 0.43601035, -0.10001026]], + dtype='float32') rgb = paddle.transpose(rgb, (0, 2, 3, 1)) yuv = paddle.matmul(rgb, kernel) return yuv diff --git a/paddlers/models/ppgan/models/base_model.py b/paddlers/models/ppgan/models/base_model.py index ae4ecd2b..a20d77af 100644 --- a/paddlers/models/ppgan/models/base_model.py +++ b/paddlers/models/ppgan/models/base_model.py @@ -49,6 +49,7 @@ class BaseModel(ABC): # save checkpoint (model.nets) \/ """ + def __init__(self, params=None): """Initialize the BaseModel class. @@ -126,8 +127,8 @@ def setup_optimizers(self, lr, cfg): parameters = [] for net_name in net_names: parameters += self.nets[net_name].parameters() - self.optimizers[opt_name] = build_optimizer( - cfg_, lr, parameters) + self.optimizers[opt_name] = build_optimizer(cfg_, lr, + parameters) return self.optimizers @@ -187,17 +188,15 @@ def export_model(self, export_model, output_dir=None, inputs_size=[]): inputs_num = 0 for net in export_model: input_spec = [ - paddle.static.InputSpec(shape=inputs_size[inputs_num + i], - dtype="float32") + paddle.static.InputSpec( + shape=inputs_size[inputs_num + i], dtype="float32") for i in range(net["inputs_num"]) ] inputs_num = inputs_num + net["inputs_num"] - static_model = paddle.jit.to_static(self.nets[net["name"]], - input_spec=input_spec) + static_model = paddle.jit.to_static( + self.nets[net["name"]], input_spec=input_spec) if output_dir is None: output_dir = 'inference_model' - paddle.jit.save( - static_model, - os.path.join( - output_dir, '{}_{}'.format(self.__class__.__name__.lower(), - net["name"]))) + paddle.jit.save(static_model, + os.path.join(output_dir, '{}_{}'.format( + self.__class__.__name__.lower(), net["name"]))) diff --git a/paddlers/models/ppgan/models/basicvsr_model.py b/paddlers/models/ppgan/models/basicvsr_model.py index 54a9b545..f9afec44 100644 --- a/paddlers/models/ppgan/models/basicvsr_model.py +++ b/paddlers/models/ppgan/models/basicvsr_model.py @@ -29,6 +29,7 @@ class BasicVSRModel(BaseSRModel): Paper: BasicVSR: The Search for Essential Components in Video Super-Resolution and Beyond, CVPR, 2021 """ + def __init__(self, generator, fix_iter, lr_mult, pixel_criterion=None): """Initialize the BasicVSR class. diff --git a/paddlers/models/ppgan/models/criterions/gan_loss.py b/paddlers/models/ppgan/models/criterions/gan_loss.py index d3fbcda4..6cc1ccbb 100644 --- a/paddlers/models/ppgan/models/criterions/gan_loss.py +++ b/paddlers/models/ppgan/models/criterions/gan_loss.py @@ -27,6 +27,7 @@ class GANLoss(nn.Layer): The GANLoss class abstracts away the need to create the target label tensor that has the same size as the input. """ + def __init__(self, gan_mode, target_real_label=1.0, diff --git a/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py b/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py index f90ed465..d604a979 100644 --- a/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py +++ b/paddlers/models/ppgan/models/criterions/photopen_perceptual_loss.py @@ -9,6 +9,7 @@ from ppgan.utils.download import get_path_from_url from .builder import CRITERIONS + class ConvBlock(nn.Layer): def __init__(self, input_channels, output_channels, groups, name=None): super(ConvBlock, self).__init__() @@ -63,6 +64,7 @@ def forward(self, inputs): x = self._pool(x) return x + class VGG19(nn.Layer): def __init__(self, layers=19, class_dim=1000): super(VGG19, self).__init__() @@ -88,13 +90,13 @@ def __init__(self, layers=19, class_dim=1000): self._drop = Dropout(p=0.5, mode="downscale_in_infer") self._fc1 = Linear( 7 * 7 * 512, - 4096,) + 4096, ) self._fc2 = Linear( 4096, - 4096,) + 4096, ) self._out = Linear( 4096, - class_dim,) + class_dim, ) def forward(self, inputs): features = [] @@ -119,14 +121,16 @@ def forward(self, inputs): x = self._out(x) return x, features + @CRITERIONS.register() class PhotoPenPerceptualLoss(nn.Layer): - def __init__(self, - crop_size, - lambda_vgg, -# pretrained='test/vgg19pretrain.pdparams', - pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams', - ): + def __init__( + self, + crop_size, + lambda_vgg, + # pretrained='test/vgg19pretrain.pdparams', + pretrained='https://paddlegan.bj.bcebos.com/models/vgg19pretrain.pdparams', + ): super(PhotoPenPerceptualLoss, self).__init__() self.model = VGG19() weight_path = get_path_from_url(pretrained) @@ -136,7 +140,7 @@ def __init__(self, self.rates = [1.0 / 32, 1.0 / 16, 1.0 / 8, 1.0 / 4, 1.0] self.crop_size = crop_size self.lambda_vgg = lambda_vgg - + def forward(self, img_r, img_f): img_r = F.interpolate(img_r, (self.crop_size, self.crop_size)) img_f = F.interpolate(img_f, (self.crop_size, self.crop_size)) @@ -146,5 +150,5 @@ def forward(self, img_r, img_f): for i in range(len(feat_r)): g_vggloss += self.rates[i] * nn.L1Loss()(feat_r[i], feat_f[i]) g_vggloss *= self.lambda_vgg - + return g_vggloss diff --git a/paddlers/models/ppgan/models/criterions/pixel_loss.py b/paddlers/models/ppgan/models/criterions/pixel_loss.py index 6e878ad7..62c7f5dc 100644 --- a/paddlers/models/ppgan/models/criterions/pixel_loss.py +++ b/paddlers/models/ppgan/models/criterions/pixel_loss.py @@ -31,6 +31,7 @@ class L1Loss(): loss_weight (float): Loss weight for L1 loss. Default: 1.0. """ + def __init__(self, reduction='mean', loss_weight=1.0): # when loss weight less than zero return None if loss_weight <= 0: @@ -59,6 +60,7 @@ class CharbonnierLoss(): eps (float): Default: 1e-12. """ + def __init__(self, eps=1e-12, reduction='sum'): self.eps = eps self.reduction = reduction @@ -90,6 +92,7 @@ class MSELoss(): loss_weight (float): Loss weight for MSE loss. Default: 1.0. """ + def __init__(self, reduction='mean', loss_weight=1.0): # when loss weight less than zero return None if loss_weight <= 0: @@ -119,6 +122,7 @@ class BCEWithLogitsLoss(): Supported choices are 'none' | 'mean' | 'sum'. Default: 'mean'. loss_weight (float): Loss weight for MSE loss. Default: 1.0. """ + def __init__(self, reduction='mean', loss_weight=1.0): # when loss weight less than zero return None if loss_weight <= 0: @@ -161,6 +165,7 @@ def calc_emd_loss(pred, target): class CalcStyleEmdLoss(): """Calc Style Emd Loss. """ + def __init__(self): super(CalcStyleEmdLoss, self).__init__() @@ -183,6 +188,7 @@ def __call__(self, pred, target): class CalcContentReltLoss(): """Calc Content Relt Loss. """ + def __init__(self): super(CalcContentReltLoss, self).__init__() @@ -207,6 +213,7 @@ def __call__(self, pred, target): class CalcContentLoss(): """Calc Content Loss. """ + def __init__(self): self.mse_loss = nn.MSELoss() @@ -221,14 +228,15 @@ def __call__(self, pred, target, norm=False): if (norm == False): return self.mse_loss(pred, target) else: - return self.mse_loss(mean_variance_norm(pred), - mean_variance_norm(target)) + return self.mse_loss( + mean_variance_norm(pred), mean_variance_norm(target)) @CRITERIONS.register() class CalcStyleLoss(): """Calc Style Loss. """ + def __init__(self): self.mse_loss = nn.MSELoss() @@ -241,31 +249,31 @@ def __call__(self, pred, target): """ pred_mean, pred_std = calc_mean_std(pred) target_mean, target_std = calc_mean_std(target) - return self.mse_loss(pred_mean, target_mean) + self.mse_loss( - pred_std, target_std) + return self.mse_loss(pred_mean, target_mean) + self.mse_loss(pred_std, + target_std) @CRITERIONS.register() class EdgeLoss(): def __init__(self): k = paddle.to_tensor([[.05, .25, .4, .25, .05]]) - self.kernel = paddle.matmul(k.t(),k).unsqueeze(0).tile([3,1,1,1]) + self.kernel = paddle.matmul(k.t(), k).unsqueeze(0).tile([3, 1, 1, 1]) self.loss = CharbonnierLoss() def conv_gauss(self, img): n_channels, _, kw, kh = self.kernel.shape - img = F.pad(img, [kw//2, kh//2, kw//2, kh//2], mode='replicate') + img = F.pad(img, [kw // 2, kh // 2, kw // 2, kh // 2], mode='replicate') return F.conv2d(img, self.kernel, groups=n_channels) def laplacian_kernel(self, current): - filtered = self.conv_gauss(current) # filter - down = filtered[:,:,::2,::2] # downsample - new_filter = paddle.zeros_like(filtered) - new_filter[:,:,::2,::2] = down*4 # upsample - filtered = self.conv_gauss(new_filter) # filter + filtered = self.conv_gauss(current) # filter + down = filtered[:, :, ::2, ::2] # downsample + new_filter = paddle.zeros_like(filtered) + new_filter[:, :, ::2, ::2] = down * 4 # upsample + filtered = self.conv_gauss(new_filter) # filter diff = current - filtered return diff def __call__(self, x, y): loss = self.loss(self.laplacian_kernel(x), self.laplacian_kernel(y)) - return loss \ No newline at end of file + return loss diff --git a/paddlers/models/ppgan/models/cycle_gan_model.py b/paddlers/models/ppgan/models/cycle_gan_model.py index 6d1c3f09..4946800c 100644 --- a/paddlers/models/ppgan/models/cycle_gan_model.py +++ b/paddlers/models/ppgan/models/cycle_gan_model.py @@ -31,6 +31,7 @@ class CycleGANModel(BaseModel): CycleGAN paper: https://arxiv.org/pdf/1703.10593.pdf """ + def __init__(self, generator, discriminator=None, diff --git a/paddlers/models/ppgan/models/dc_gan_model.py b/paddlers/models/ppgan/models/dc_gan_model.py index 220e05c0..787299b1 100644 --- a/paddlers/models/ppgan/models/dc_gan_model.py +++ b/paddlers/models/ppgan/models/dc_gan_model.py @@ -28,6 +28,7 @@ class DCGANModel(BaseModel): This class implements the DCGAN model, for learning a distribution from input images. DCGAN paper: https://arxiv.org/pdf/1511.06434 """ + def __init__(self, generator, discriminator=None, gan_criterion=None): """Initialize the DCGAN class. Args: diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py b/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py index c06ad72f..09b4b17b 100644 --- a/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py +++ b/paddlers/models/ppgan/models/discriminators/discriminator_animegan.py @@ -11,34 +11,34 @@ @DISCRIMINATORS.register() class AnimeDiscriminator(nn.Layer): - def __init__(self, channel: int = 64, nblocks: int = 3) -> None: + def __init__(self, channel: int=64, nblocks: int=3) -> None: super().__init__() channel = channel // 2 last_channel = channel f = [ spectral_norm( - nn.Conv2D(3, channel, 3, stride=1, padding=1, bias_attr=False)), + nn.Conv2D( + 3, channel, 3, stride=1, padding=1, bias_attr=False)), nn.LeakyReLU(0.2) ] in_h = 256 for i in range(1, nblocks): f.extend([ spectral_norm( - nn.Conv2D(last_channel, - channel * 2, - 3, - stride=2, - padding=1, - bias_attr=False)), - nn.LeakyReLU(0.2), - spectral_norm( - nn.Conv2D(channel * 2, - channel * 4, - 3, - stride=1, - padding=1, - bias_attr=False)), - nn.GroupNorm(1, channel * 4), + nn.Conv2D( + last_channel, + channel * 2, + 3, + stride=2, + padding=1, + bias_attr=False)), nn.LeakyReLU(0.2), spectral_norm( + nn.Conv2D( + channel * 2, + channel * 4, + 3, + stride=1, + padding=1, + bias_attr=False)), nn.GroupNorm(1, channel * 4), nn.LeakyReLU(0.2) ]) last_channel = channel * 4 @@ -49,15 +49,14 @@ def __init__(self, channel: int = 64, nblocks: int = 3) -> None: self.head = nn.Sequential(*[ spectral_norm( - nn.Conv2D(last_channel, - channel * 2, - 3, - stride=1, - padding=1, - bias_attr=False)), - nn.GroupNorm(1, channel * 2), - nn.LeakyReLU(0.2), - spectral_norm( + nn.Conv2D( + last_channel, + channel * 2, + 3, + stride=1, + padding=1, + bias_attr=False)), nn.GroupNorm(1, channel * 2), + nn.LeakyReLU(0.2), spectral_norm( nn.Conv2D( channel * 2, 1, 3, stride=1, padding=1, bias_attr=False)) ]) diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py b/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py index 9a18d70e..a9369486 100644 --- a/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py +++ b/paddlers/models/ppgan/models/discriminators/discriminator_firstorder.py @@ -31,6 +31,7 @@ class FirstOrderDiscriminator(nn.Layer): loss_weights: discriminator_gan (int): weight of discriminator loss """ + def __init__(self, discriminator_cfg, common_params, train_params): super(FirstOrderDiscriminator, self).__init__() self.discriminator = MultiScaleDiscriminator(**discriminator_cfg, @@ -47,8 +48,8 @@ def forward(self, x, generated): kp_driving = generated['kp_driving'] discriminator_maps_generated = self.discriminator( pyramide_generated, kp=detach_kp(kp_driving)) - discriminator_maps_real = self.discriminator(pyramide_real, - kp=detach_kp(kp_driving)) + discriminator_maps_real = self.discriminator( + pyramide_real, kp=detach_kp(kp_driving)) loss_values = {} value_total = 0 @@ -66,6 +67,7 @@ class DownBlock2d(nn.Layer): """ Simple block for processing video (encoder). """ + def __init__(self, in_features, out_features, @@ -74,16 +76,15 @@ def __init__(self, pool=False, sn=False): super(DownBlock2d, self).__init__() - self.conv = nn.Conv2D(in_features, - out_features, - kernel_size=kernel_size) + self.conv = nn.Conv2D( + in_features, out_features, kernel_size=kernel_size) if sn: self.conv = spectral_norm(self.conv) else: self.sn = None if norm: - self.norm = nn.InstanceNorm2D(num_features=out_features, - epsilon=1e-05) + self.norm = nn.InstanceNorm2D( + num_features=out_features, epsilon=1e-05) else: self.norm = None @@ -117,19 +118,21 @@ def __init__(self, down_blocks = [] for i in range(num_blocks): down_blocks.append( - DownBlock2d(num_channels + num_kp * use_kp if i == 0 else min( - max_features, block_expansion * (2**i)), - min(max_features, block_expansion * (2**(i + 1))), - norm=(i != 0), - kernel_size=4, - pool=(i != num_blocks - 1), - sn=sn)) + DownBlock2d( + num_channels + num_kp * use_kp + if i == 0 else min(max_features, block_expansion * (2**i)), + min(max_features, block_expansion * (2**(i + 1))), + norm=(i != 0), + kernel_size=4, + pool=(i != num_blocks - 1), + sn=sn)) self.down_blocks = nn.LayerList(down_blocks) - self.conv = nn.Conv2D(self.down_blocks[len(self.down_blocks) - - 1].conv.parameters()[0].shape[0], - 1, - kernel_size=1) + self.conv = nn.Conv2D( + self.down_blocks[len(self.down_blocks) - 1].conv.parameters()[0] + .shape[0], + 1, + kernel_size=1) if sn: self.conv = spectral_norm(self.conv) else: @@ -156,6 +159,7 @@ class MultiScaleDiscriminator(nn.Layer): """ Multi-scale (scale) discriminator """ + def __init__(self, scales=(), **kwargs): super(MultiScaleDiscriminator, self).__init__() self.scales = scales diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py b/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py index 624cfd86..a6cb449b 100644 --- a/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py +++ b/paddlers/models/ppgan/models/discriminators/discriminator_lapstyle.py @@ -25,27 +25,25 @@ def __init__(self): num_layer = 3 num_channel = 32 self.head = nn.Sequential( - ('conv', - nn.Conv2D(3, num_channel, kernel_size=3, stride=1, padding=1)), - ('norm', nn.BatchNorm2D(num_channel)), + ('conv', nn.Conv2D( + 3, num_channel, kernel_size=3, stride=1, + padding=1)), ('norm', nn.BatchNorm2D(num_channel)), ('LeakyRelu', nn.LeakyReLU(0.2))) self.body = nn.Sequential() for i in range(num_layer - 2): self.body.add_sublayer( 'conv%d' % (i + 1), - nn.Conv2D(num_channel, - num_channel, - kernel_size=3, - stride=1, - padding=1)) + nn.Conv2D( + num_channel, + num_channel, + kernel_size=3, + stride=1, + padding=1)) self.body.add_sublayer('norm%d' % (i + 1), nn.BatchNorm2D(num_channel)) self.body.add_sublayer('LeakyRelu%d' % (i + 1), nn.LeakyReLU(0.2)) - self.tail = nn.Conv2D(num_channel, - 1, - kernel_size=3, - stride=1, - padding=1) + self.tail = nn.Conv2D( + num_channel, 1, kernel_size=3, stride=1, padding=1) def forward(self, x): x = self.head(x) diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py b/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py index 0e378a41..6a01d3ba 100644 --- a/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py +++ b/paddlers/models/ppgan/models/discriminators/discriminator_photopen.py @@ -25,44 +25,47 @@ from .builder import DISCRIMINATORS - class NLayersDiscriminator(nn.Layer): def __init__(self, opt): super(NLayersDiscriminator, self).__init__() - + kw = 4 padw = int(np.ceil((kw - 1.0) / 2)) nf = opt.ndf input_nc = self.compute_D_input_nc(opt) layer_count = 0 - layer = nn.Sequential( - nn.Conv2D(input_nc, nf, kw, 2, padw), - nn.GELU() - ) - self.add_sublayer('block_'+str(layer_count), layer) + layer = nn.Sequential(nn.Conv2D(input_nc, nf, kw, 2, padw), nn.GELU()) + self.add_sublayer('block_' + str(layer_count), layer) layer_count += 1 - feat_size_prev = np.floor((opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64') + feat_size_prev = np.floor( + (opt.crop_size + padw * 2 - (kw - 2)) / 2).astype('int64') InstanceNorm = build_norm_layer('instance') for n in range(1, opt.n_layers_D): nf_prev = nf nf = min(nf * 2, 512) stride = 1 if n == opt.n_layers_D - 1 else 2 - feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) / stride).astype('int64') + feat_size = np.floor((feat_size_prev + padw * 2 - (kw - stride)) / + stride).astype('int64') feat_size_prev = feat_size layer = nn.Sequential( - spectral_norm(nn.Conv2D(nf_prev, nf, kw, stride, padw, - weight_attr=None, - bias_attr=None)), + spectral_norm( + nn.Conv2D( + nf_prev, + nf, + kw, + stride, + padw, + weight_attr=None, + bias_attr=None)), InstanceNorm(nf), - nn.GELU() - ) - self.add_sublayer('block_'+str(layer_count), layer) + nn.GELU()) + self.add_sublayer('block_' + str(layer_count), layer) layer_count += 1 layer = nn.Conv2D(nf, 1, kw, 1, padw) - self.add_sublayer('block_'+str(layer_count), layer) + self.add_sublayer('block_' + str(layer_count), layer) layer_count += 1 def forward(self, input): @@ -80,22 +83,22 @@ def compute_D_input_nc(self, opt): if not opt.no_instance: input_nc += 1 return input_nc - + + @DISCRIMINATORS.register() class MultiscaleDiscriminator(nn.Layer): - def __init__(self, - ndf, - num_D, - crop_size, - label_nc, - output_nc, - contain_dontcare_label, - no_instance, - n_layers_D, - - ): + def __init__( + self, + ndf, + num_D, + crop_size, + label_nc, + output_nc, + contain_dontcare_label, + no_instance, + n_layers_D, ): super(MultiscaleDiscriminator, self).__init__() - + opt = { 'ndf': ndf, 'num_D': num_D, @@ -105,7 +108,6 @@ def __init__(self, 'contain_dontcare_label': contain_dontcare_label, 'no_instance': no_instance, 'n_layers_D': n_layers_D, - } opt = Dict(opt) @@ -115,16 +117,16 @@ def __init__(self, feat_size = opt.crop_size for j in range(i): sequence += [nn.AvgPool2D(3, 2, 1)] - feat_size = np.floor((feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64') + feat_size = np.floor( + (feat_size + 1 * 2 - (3 - 2)) / 2).astype('int64') opt.crop_size = feat_size sequence += [NLayersDiscriminator(opt)] opt.crop_size = crop_size_bkp sequence = nn.Sequential(*sequence) - self.add_sublayer('nld_'+str(i), sequence) + self.add_sublayer('nld_' + str(i), sequence) def forward(self, input): output = [] for layer in self._sub_layers.values(): output.append(layer(input)) return output - diff --git a/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py b/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py index 62e4da12..92e19827 100644 --- a/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py +++ b/paddlers/models/ppgan/models/discriminators/discriminator_ugatit.py @@ -13,63 +13,61 @@ class UGATITDiscriminator(nn.Layer): def __init__(self, input_nc, ndf=64, n_layers=5): super(UGATITDiscriminator, self).__init__() model = [ - nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), - spectral_norm( - nn.Conv2D(input_nc, - ndf, - kernel_size=4, - stride=2, - padding=0, - bias_attr=True)), - nn.LeakyReLU(0.2) + nn.Pad2D( + padding=[1, 1, 1, 1], mode="reflect"), spectral_norm( + nn.Conv2D( + input_nc, + ndf, + kernel_size=4, + stride=2, + padding=0, + bias_attr=True)), nn.LeakyReLU(0.2) ] for i in range(1, n_layers - 2): mult = 2**(i - 1) model += [ - nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), - spectral_norm( - nn.Conv2D(ndf * mult, - ndf * mult * 2, - kernel_size=4, - stride=2, - padding=0, - bias_attr=True)), - nn.LeakyReLU(0.2) + nn.Pad2D( + padding=[1, 1, 1, 1], mode="reflect"), spectral_norm( + nn.Conv2D( + ndf * mult, + ndf * mult * 2, + kernel_size=4, + stride=2, + padding=0, + bias_attr=True)), nn.LeakyReLU(0.2) ] mult = 2**(n_layers - 2 - 1) model += [ - nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect"), - spectral_norm( - nn.Conv2D(ndf * mult, - ndf * mult * 2, - kernel_size=4, - stride=1, - padding=0, - bias_attr=True)), - nn.LeakyReLU(0.2) + nn.Pad2D( + padding=[1, 1, 1, 1], mode="reflect"), spectral_norm( + nn.Conv2D( + ndf * mult, + ndf * mult * 2, + kernel_size=4, + stride=1, + padding=0, + bias_attr=True)), nn.LeakyReLU(0.2) ] # Class Activation Map mult = 2**(n_layers - 2) self.gap_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False)) self.gmp_fc = spectral_norm(nn.Linear(ndf * mult, 1, bias_attr=False)) - self.conv1x1 = nn.Conv2D(ndf * mult * 2, - ndf * mult, - kernel_size=1, - stride=1, - bias_attr=True) + self.conv1x1 = nn.Conv2D( + ndf * mult * 2, ndf * mult, kernel_size=1, stride=1, bias_attr=True) self.leaky_relu = nn.LeakyReLU(0.2) self.pad = nn.Pad2D(padding=[1, 1, 1, 1], mode="reflect") self.conv = spectral_norm( - nn.Conv2D(ndf * mult, - 1, - kernel_size=4, - stride=1, - padding=0, - bias_attr=False)) + nn.Conv2D( + ndf * mult, + 1, + kernel_size=4, + stride=1, + padding=0, + bias_attr=False)) self.model = nn.Sequential(*model) diff --git a/paddlers/models/ppgan/models/discriminators/syncnet.py b/paddlers/models/ppgan/models/discriminators/syncnet.py index 9fc3d26a..14295777 100644 --- a/paddlers/models/ppgan/models/discriminators/syncnet.py +++ b/paddlers/models/ppgan/models/discriminators/syncnet.py @@ -17,126 +17,70 @@ def __init__(self): super(SyncNetColor, self).__init__() self.face_encoder = nn.Sequential( - ConvBNRelu(15, 32, kernel_size=(7, 7), stride=1, padding=3), - ConvBNRelu(32, 64, kernel_size=5, stride=(1, 2), padding=1), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(512, 512, kernel_size=3, stride=2, padding=1), - ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0), - ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0), - ) + ConvBNRelu( + 15, 32, kernel_size=(7, 7), stride=1, padding=3), + ConvBNRelu( + 32, 64, kernel_size=5, stride=(1, 2), padding=1), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 128, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 256, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 256, 256, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 256, 256, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 256, 512, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 512, 512, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 512, 512, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 512, 512, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 512, 512, kernel_size=3, stride=1, padding=0), + ConvBNRelu( + 512, 512, kernel_size=1, stride=1, padding=0), ) self.audio_encoder = nn.Sequential( - ConvBNRelu(1, 32, kernel_size=3, stride=1, padding=1), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(32, 64, kernel_size=3, stride=(3, 1), padding=1), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, 128, kernel_size=3, stride=3, padding=1), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, 256, kernel_size=3, stride=(3, 2), padding=1), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, 512, kernel_size=3, stride=1, padding=0), - ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0), - ) + ConvBNRelu( + 1, 32, kernel_size=3, stride=1, padding=1), + ConvBNRelu( + 32, 32, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 32, 32, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 32, 64, kernel_size=3, stride=(3, 1), padding=1), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 128, kernel_size=3, stride=3, padding=1), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 256, kernel_size=3, stride=(3, 2), padding=1), + ConvBNRelu( + 256, 256, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 256, 256, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 256, 512, kernel_size=3, stride=1, padding=0), + ConvBNRelu( + 512, 512, kernel_size=1, stride=1, padding=0), ) def forward(self, audio_sequences, face_sequences): # audio_sequences := (B, dim, T) diff --git a/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py b/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py index 74b6112a..1bcfa2dc 100644 --- a/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py +++ b/paddlers/models/ppgan/models/discriminators/vgg_discriminator.py @@ -16,6 +16,7 @@ class VGGDiscriminator128(nn.Layer): num_feat (int): Channel number of base intermediate features. Default: 64. """ + def __init__(self, in_channels, num_feat, norm_layer='batch'): super(VGGDiscriminator128, self).__init__() @@ -23,64 +24,32 @@ def __init__(self, in_channels, num_feat, norm_layer='batch'): self.conv0_1 = nn.Conv2D(num_feat, num_feat, 4, 2, 1, bias_attr=False) self.bn0_1 = nn.BatchNorm2D(num_feat) - self.conv1_0 = nn.Conv2D(num_feat, - num_feat * 2, - 3, - 1, - 1, - bias_attr=False) + self.conv1_0 = nn.Conv2D( + num_feat, num_feat * 2, 3, 1, 1, bias_attr=False) self.bn1_0 = nn.BatchNorm2D(num_feat * 2) - self.conv1_1 = nn.Conv2D(num_feat * 2, - num_feat * 2, - 4, - 2, - 1, - bias_attr=False) + self.conv1_1 = nn.Conv2D( + num_feat * 2, num_feat * 2, 4, 2, 1, bias_attr=False) self.bn1_1 = nn.BatchNorm2D(num_feat * 2) - self.conv2_0 = nn.Conv2D(num_feat * 2, - num_feat * 4, - 3, - 1, - 1, - bias_attr=False) + self.conv2_0 = nn.Conv2D( + num_feat * 2, num_feat * 4, 3, 1, 1, bias_attr=False) self.bn2_0 = nn.BatchNorm2D(num_feat * 4) - self.conv2_1 = nn.Conv2D(num_feat * 4, - num_feat * 4, - 4, - 2, - 1, - bias_attr=False) + self.conv2_1 = nn.Conv2D( + num_feat * 4, num_feat * 4, 4, 2, 1, bias_attr=False) self.bn2_1 = nn.BatchNorm2D(num_feat * 4) - self.conv3_0 = nn.Conv2D(num_feat * 4, - num_feat * 8, - 3, - 1, - 1, - bias_attr=False) + self.conv3_0 = nn.Conv2D( + num_feat * 4, num_feat * 8, 3, 1, 1, bias_attr=False) self.bn3_0 = nn.BatchNorm2D(num_feat * 8) - self.conv3_1 = nn.Conv2D(num_feat * 8, - num_feat * 8, - 4, - 2, - 1, - bias_attr=False) + self.conv3_1 = nn.Conv2D( + num_feat * 8, num_feat * 8, 4, 2, 1, bias_attr=False) self.bn3_1 = nn.BatchNorm2D(num_feat * 8) - self.conv4_0 = nn.Conv2D(num_feat * 8, - num_feat * 8, - 3, - 1, - 1, - bias_attr=False) + self.conv4_0 = nn.Conv2D( + num_feat * 8, num_feat * 8, 3, 1, 1, bias_attr=False) self.bn4_0 = nn.BatchNorm2D(num_feat * 8) - self.conv4_1 = nn.Conv2D(num_feat * 8, - num_feat * 8, - 4, - 2, - 1, - bias_attr=False) + self.conv4_1 = nn.Conv2D( + num_feat * 8, num_feat * 8, 4, 2, 1, bias_attr=False) self.bn4_1 = nn.BatchNorm2D(num_feat * 8) self.linear1 = nn.Linear(num_feat * 8 * 4 * 4, 100) @@ -95,24 +64,24 @@ def forward(self, x): f'but received {x.shape}.') feat = self.lrelu(self.conv0_0(x)) - feat = self.lrelu(self.bn0_1( - self.conv0_1(feat))) # output spatial size: (64, 64) + feat = self.lrelu( + self.bn0_1(self.conv0_1(feat))) # output spatial size: (64, 64) feat = self.lrelu(self.bn1_0(self.conv1_0(feat))) - feat = self.lrelu(self.bn1_1( - self.conv1_1(feat))) # output spatial size: (32, 32) + feat = self.lrelu( + self.bn1_1(self.conv1_1(feat))) # output spatial size: (32, 32) feat = self.lrelu(self.bn2_0(self.conv2_0(feat))) - feat = self.lrelu(self.bn2_1( - self.conv2_1(feat))) # output spatial size: (16, 16) + feat = self.lrelu( + self.bn2_1(self.conv2_1(feat))) # output spatial size: (16, 16) feat = self.lrelu(self.bn3_0(self.conv3_0(feat))) - feat = self.lrelu(self.bn3_1( - self.conv3_1(feat))) # output spatial size: (8, 8) + feat = self.lrelu( + self.bn3_1(self.conv3_1(feat))) # output spatial size: (8, 8) feat = self.lrelu(self.bn4_0(self.conv4_0(feat))) - feat = self.lrelu(self.bn4_1( - self.conv4_1(feat))) # output spatial size: (4, 4) + feat = self.lrelu( + self.bn4_1(self.conv4_1(feat))) # output spatial size: (4, 4) feat = feat.reshape([feat.shape[0], -1]) feat = self.lrelu(self.linear1(feat)) diff --git a/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py b/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py index ea1f9c8f..3b22ee34 100644 --- a/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py +++ b/paddlers/models/ppgan/models/discriminators/wav2lip_disc_qual.py @@ -17,37 +17,43 @@ def __init__(self): self.face_encoder_blocks = nn.LayerList([ nn.Sequential( - NonNormConv2d(3, 32, kernel_size=7, stride=1, - padding=3)), # 48,96 + NonNormConv2d( + 3, 32, kernel_size=7, stride=1, padding=3)), # 48,96 nn.Sequential( - NonNormConv2d(32, 64, kernel_size=5, stride=(1, 2), - padding=2), # 48,48 - NonNormConv2d(64, 64, kernel_size=5, stride=1, padding=2)), + NonNormConv2d( + 32, 64, kernel_size=5, stride=(1, 2), padding=2), # 48,48 + NonNormConv2d( + 64, 64, kernel_size=5, stride=1, padding=2)), nn.Sequential( - NonNormConv2d(64, 128, kernel_size=5, stride=2, - padding=2), # 24,24 - NonNormConv2d(128, 128, kernel_size=5, stride=1, padding=2)), + NonNormConv2d( + 64, 128, kernel_size=5, stride=2, padding=2), # 24,24 + NonNormConv2d( + 128, 128, kernel_size=5, stride=1, padding=2)), nn.Sequential( - NonNormConv2d(128, 256, kernel_size=5, stride=2, - padding=2), # 12,12 - NonNormConv2d(256, 256, kernel_size=5, stride=1, padding=2)), + NonNormConv2d( + 128, 256, kernel_size=5, stride=2, padding=2), # 12,12 + NonNormConv2d( + 256, 256, kernel_size=5, stride=1, padding=2)), nn.Sequential( - NonNormConv2d(256, 512, kernel_size=3, stride=2, - padding=1), # 6,6 - NonNormConv2d(512, 512, kernel_size=3, stride=1, padding=1)), + NonNormConv2d( + 256, 512, kernel_size=3, stride=2, padding=1), # 6,6 + NonNormConv2d( + 512, 512, kernel_size=3, stride=1, padding=1)), nn.Sequential( - NonNormConv2d(512, 512, kernel_size=3, stride=2, - padding=1), # 3,3 - NonNormConv2d(512, 512, kernel_size=3, stride=1, padding=1), - ), + NonNormConv2d( + 512, 512, kernel_size=3, stride=2, padding=1), # 3,3 + NonNormConv2d( + 512, 512, kernel_size=3, stride=1, padding=1), ), nn.Sequential( - NonNormConv2d(512, 512, kernel_size=3, stride=1, - padding=0), # 1, 1 - NonNormConv2d(512, 512, kernel_size=1, stride=1, padding=0)), + NonNormConv2d( + 512, 512, kernel_size=3, stride=1, padding=0), # 1, 1 + NonNormConv2d( + 512, 512, kernel_size=1, stride=1, padding=0)), ]) self.binary_pred = nn.Sequential( - nn.Conv2D(512, 1, kernel_size=1, stride=1, padding=0), nn.Sigmoid()) + nn.Conv2D( + 512, 1, kernel_size=1, stride=1, padding=0), nn.Sigmoid()) self.label_noise = .0 def get_lower_half(self, face_sequences): @@ -71,8 +77,9 @@ def perceptual_forward(self, false_face_sequences): binary_pred = self.binary_pred(false_feats).reshape( (len(false_feats), -1)) - false_pred_loss = F.binary_cross_entropy( - binary_pred, paddle.ones((len(false_feats), 1))) + false_pred_loss = F.binary_cross_entropy(binary_pred, + paddle.ones( + (len(false_feats), 1))) return false_pred_loss diff --git a/paddlers/models/ppgan/models/drn_model.py b/paddlers/models/ppgan/models/drn_model.py index ce44e988..8e8e91a6 100644 --- a/paddlers/models/ppgan/models/drn_model.py +++ b/paddlers/models/ppgan/models/drn_model.py @@ -32,6 +32,7 @@ class DRN(BaseSRModel): DRN paper: https://arxiv.org/pdf/1809.00219.pdf """ + def __init__(self, generator, lq_loss_weight=0.1, @@ -82,8 +83,8 @@ def setup_input(self, input): self.lq = paddle.to_tensor(input['lq']) self.visual_items['lq'] = self.lq - if isinstance(self.scale, (list, tuple)) and len( - self.scale) == 2 and 'lqx2' in input: + if isinstance(self.scale, ( + list, tuple)) and len(self.scale) == 2 and 'lqx2' in input: self.lqx2 = input['lqx2'] if 'gt' in input: diff --git a/paddlers/models/ppgan/models/edvr_model.py b/paddlers/models/ppgan/models/edvr_model.py index 3fa270d5..4b47aeb4 100644 --- a/paddlers/models/ppgan/models/edvr_model.py +++ b/paddlers/models/ppgan/models/edvr_model.py @@ -27,6 +27,7 @@ class EDVRModel(BaseSRModel): Paper: EDVR: Video Restoration with Enhanced Deformable Convolutional Networks. """ + def __init__(self, generator, tsa_iter, pixel_criterion=None): """Initialize the EDVR class. @@ -77,8 +78,8 @@ def train_iter(self, optims=None): def init_edvr_weight(net): def reset_func(m): - if hasattr(m, 'weight') and (not isinstance( - m, (nn.BatchNorm, nn.BatchNorm2D))) and ( + if hasattr(m, 'weight') and ( + not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))) and ( not isinstance(m, ResidualBlockNoBN) and (not isinstance(m, DCNPack))): reset_parameters(m) diff --git a/paddlers/models/ppgan/models/esrgan_model.py b/paddlers/models/ppgan/models/esrgan_model.py index fe67cff0..ede63782 100644 --- a/paddlers/models/ppgan/models/esrgan_model.py +++ b/paddlers/models/ppgan/models/esrgan_model.py @@ -29,6 +29,7 @@ class ESRGAN(BaseSRModel): ESRGAN paper: https://arxiv.org/pdf/1809.00219.pdf """ + def __init__(self, generator, discriminator=None, @@ -71,8 +72,8 @@ def train_iter(self, optimizers=None): l_total += l_pix self.losses['loss_pix'] = l_pix if self.perceptual_criterion: - l_g_percep, l_g_style = self.perceptual_criterion( - self.output, self.gt) + l_g_percep, l_g_style = self.perceptual_criterion(self.output, + self.gt) # l_total += l_pix if l_g_percep is not None: l_total += l_g_percep @@ -86,14 +87,10 @@ def train_iter(self, optimizers=None): self.set_requires_grad(self.nets['discriminator'], False) real_d_pred = self.nets['discriminator'](self.gt).detach() fake_g_pred = self.nets['discriminator'](self.output) - l_g_real = self.gan_criterion(real_d_pred - - paddle.mean(fake_g_pred), - False, - is_disc=False) - l_g_fake = self.gan_criterion(fake_g_pred - - paddle.mean(real_d_pred), - True, - is_disc=False) + l_g_real = self.gan_criterion( + real_d_pred - paddle.mean(fake_g_pred), False, is_disc=False) + l_g_fake = self.gan_criterion( + fake_g_pred - paddle.mean(real_d_pred), True, is_disc=False) l_g_gan = (l_g_real + l_g_fake) / 2 l_total += l_g_gan diff --git a/paddlers/models/ppgan/models/gan_model.py b/paddlers/models/ppgan/models/gan_model.py index cfe157d2..d0583532 100644 --- a/paddlers/models/ppgan/models/gan_model.py +++ b/paddlers/models/ppgan/models/gan_model.py @@ -32,6 +32,7 @@ class GANModel(BaseModel): vanilla GAN paper: https://arxiv.org/abs/1406.2661 """ + def __init__(self, generator, discriminator=None, @@ -50,12 +51,12 @@ def __init__(self, self.disc_iters = 1 if self.params is None else self.params.get( 'disc_iters', 1) - self.disc_start_iters = (0 if self.params is None else self.params.get( - 'disc_start_iters', 0)) - self.samples_every_row = (8 if self.params is None else self.params.get( - 'samples_every_row', 8)) - self.visual_interval = (500 if self.params is None else self.params.get( - 'visual_interval', 500)) + self.disc_start_iters = (0 if self.params is None else + self.params.get('disc_start_iters', 0)) + self.samples_every_row = (8 if self.params is None else + self.params.get('samples_every_row', 8)) + self.visual_interval = (500 if self.params is None else + self.params.get('visual_interval', 500)) # define generator self.nets['netG'] = build_generator(generator) @@ -89,7 +90,8 @@ def setup_input(self, input): self.n_class = self.nets['netG'].n_class self.D_real_inputs += [ - paddle.to_tensor(input['class_id'], dtype='int64') + paddle.to_tensor( + input['class_id'], dtype='int64') ] else: self.n_class = 0 @@ -110,9 +112,7 @@ def setup_input(self, input): rows_num = (batch_size - 1) // self.samples_every_row + 1 class_ids = paddle.randint(0, self.n_class, [rows_num, 1]) class_ids = class_ids.tile([1, self.samples_every_row]) - class_ids = class_ids.reshape([ - -1, - ])[:batch_size].detach() + class_ids = class_ids.reshape([-1, ])[:batch_size].detach() self.G_fixed_inputs[1] = class_ids.detach() def forward(self): @@ -143,8 +143,8 @@ def backward_D(self): # combine loss and calculate gradients if self.criterionGAN.gan_mode in ['vanilla', 'lsgan']: - self.loss_D = self.loss_D + (self.loss_D_fake + - self.loss_D_real) * 0.5 + self.loss_D = self.loss_D + (self.loss_D_fake + self.loss_D_real + ) * 0.5 else: self.loss_D = self.loss_D + self.loss_D_fake + self.loss_D_real diff --git a/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py b/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py index b0783258..cf500390 100644 --- a/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py +++ b/paddlers/models/ppgan/models/generators/basicvsr_plus_plus.py @@ -45,6 +45,7 @@ class BasicVSRPlusPlus(nn.Layer): or not. If False, the output resolution is equal to the input resolution. Default: True. """ + def __init__(self, mid_channels=64, num_blocks=7, is_low_res_input=True): super().__init__() @@ -88,21 +89,16 @@ def __init__(self, mid_channels=64, num_blocks=7, is_low_res_input=True): 5 * mid_channels, mid_channels, num_blocks) # upsampling module - self.reconstruction = ResidualBlocksWithInputConv( - 5 * mid_channels, mid_channels, 5) - self.upsample1 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) - self.upsample2 = PixelShufflePack(mid_channels, - 64, - 2, - upsample_kernel=3) + self.reconstruction = ResidualBlocksWithInputConv(5 * mid_channels, + mid_channels, 5) + self.upsample1 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) + self.upsample2 = PixelShufflePack( + mid_channels, 64, 2, upsample_kernel=3) self.conv_hr = nn.Conv2D(64, 64, 3, 1, 1) self.conv_last = nn.Conv2D(64, 3, 3, 1, 1) - self.img_upsample = nn.Upsample(scale_factor=4, - mode='bilinear', - align_corners=False) + self.img_upsample = nn.Upsample( + scale_factor=4, mode='bilinear', align_corners=False) # activation function self.lrelu = nn.LeakyReLU(negative_slope=0.1) @@ -198,10 +194,9 @@ def forward(self, lqs): if self.is_low_res_input: lqs_downsample = lqs else: - lqs_downsample = F.interpolate(lqs.reshape([-1, c, h, w]), - scale_factor=0.25, - mode='bicubic').reshape( - [n, t, c, h // 4, w // 4]) + lqs_downsample = F.interpolate( + lqs.reshape([-1, c, h, w]), scale_factor=0.25, + mode='bicubic').reshape([n, t, c, h // 4, w // 4]) # check whether the input is an extended sequence self.check_if_mirror_extended(lqs) @@ -213,9 +208,10 @@ def forward(self, lqs): feats['spatial'] = [feats_[:, i, :, :, :] for i in range(0, t)] # compute optical flow using the low-res inputs - assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[4] >= 64, ( - 'The height and width of low-res inputs must be at least 64, ' - f'but got {h} and {w}.') + assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[ + 4] >= 64, ( + 'The height and width of low-res inputs must be at least 64, ' + f'but got {h} and {w}.') flows_forward, flows_backward = self.compute_flow(lqs_downsample) # feature propgation @@ -253,20 +249,20 @@ def forward(self, lqs): flow_n2 = flow_n1 + flow_warp( flow_n2, flow_n1.transpose([0, 2, 3, 1])) - cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3, - 1])) + cond_n2 = flow_warp(feat_n2, + flow_n2.transpose([0, 2, 3, 1])) # flow-guided deformable convolution cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1) feat_prop = paddle.concat([feat_prop, feat_n2], axis=1) - feat_prop = self.deform_align_backward_1( - feat_prop, cond, flow_n1, flow_n2) + feat_prop = self.deform_align_backward_1(feat_prop, cond, + flow_n1, flow_n2) # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', 'backward_1'] + feats[k][idx] for k in feats + if k not in ['spatial', 'backward_1'] ] + [feat_prop] feat = paddle.concat(feat, axis=1) @@ -308,8 +304,8 @@ def forward(self, lqs): flow_n2 = flow_n1 + flow_warp( flow_n2, flow_n1.transpose([0, 2, 3, 1])) - cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3, - 1])) + cond_n2 = flow_warp(feat_n2, + flow_n2.transpose([0, 2, 3, 1])) # flow-guided deformable convolution cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1) @@ -320,8 +316,8 @@ def forward(self, lqs): # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', 'forward_1'] + feats[k][idx] for k in feats + if k not in ['spatial', 'forward_1'] ] + [feat_prop] feat = paddle.concat(feat, axis=1) @@ -361,20 +357,20 @@ def forward(self, lqs): flow_n2 = flow_n1 + flow_warp( flow_n2, flow_n1.transpose([0, 2, 3, 1])) - cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3, - 1])) + cond_n2 = flow_warp(feat_n2, + flow_n2.transpose([0, 2, 3, 1])) # flow-guided deformable convolution cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1) feat_prop = paddle.concat([feat_prop, feat_n2], axis=1) - feat_prop = self.deform_align_backward_2( - feat_prop, cond, flow_n1, flow_n2) + feat_prop = self.deform_align_backward_2(feat_prop, cond, + flow_n1, flow_n2) # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', 'backward_2'] + feats[k][idx] for k in feats + if k not in ['spatial', 'backward_2'] ] + [feat_prop] feat = paddle.concat(feat, axis=1) @@ -416,8 +412,8 @@ def forward(self, lqs): flow_n2 = flow_n1 + flow_warp( flow_n2, flow_n1.transpose([0, 2, 3, 1])) - cond_n2 = flow_warp(feat_n2, flow_n2.transpose([0, 2, 3, - 1])) + cond_n2 = flow_warp(feat_n2, + flow_n2.transpose([0, 2, 3, 1])) # flow-guided deformable convolution cond = paddle.concat([cond_n1, feat_current, cond_n2], axis=1) @@ -428,8 +424,8 @@ def forward(self, lqs): # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', 'forward_2'] + feats[k][idx] for k in feats + if k not in ['spatial', 'forward_2'] ] + [feat_prop] feat = paddle.concat(feat, axis=1) diff --git a/paddlers/models/ppgan/models/generators/edvr.py b/paddlers/models/ppgan/models/generators/edvr.py index 1a578394..3603c945 100644 --- a/paddlers/models/ppgan/models/generators/edvr.py +++ b/paddlers/models/ppgan/models/generators/edvr.py @@ -62,6 +62,7 @@ class ResidualBlockNoBN(nn.Layer): nf (int): Channel number of intermediate features. Default: 64. """ + def __init__(self, nf=64): super(ResidualBlockNoBN, self).__init__() self.nf = nf @@ -100,6 +101,7 @@ class PredeblurResNetPyramid(nn.Layer): nf (int): Channel number of intermediate features. Default: 64. HR_in (bool): Whether the input has high resolution. Default: False. """ + def __init__(self, in_nf=3, nf=64, HR_in=False): super(PredeblurResNetPyramid, self).__init__() self.in_nf = in_nf @@ -107,27 +109,31 @@ def __init__(self, in_nf=3, nf=64, HR_in=False): self.HR_in = True if HR_in else False self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1) if self.HR_in: - self.conv_first_1 = nn.Conv2D(in_channels=self.in_nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) - self.conv_first_2 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=2, - padding=1) - self.conv_first_3 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=2, - padding=1) + self.conv_first_1 = nn.Conv2D( + in_channels=self.in_nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) + self.conv_first_2 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=2, + padding=1) + self.conv_first_3 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=2, + padding=1) else: - self.conv_first = nn.Conv2D(in_channels=self.in_nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) + self.conv_first = nn.Conv2D( + in_channels=self.in_nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) self.RB_L1_1 = ResidualBlockNoBN(nf=self.nf) self.RB_L1_2 = ResidualBlockNoBN(nf=self.nf) self.RB_L1_3 = ResidualBlockNoBN(nf=self.nf) @@ -136,20 +142,20 @@ def __init__(self, in_nf=3, nf=64, HR_in=False): self.RB_L2_1 = ResidualBlockNoBN(nf=self.nf) self.RB_L2_2 = ResidualBlockNoBN(nf=self.nf) self.RB_L3_1 = ResidualBlockNoBN(nf=self.nf) - self.deblur_L2_conv = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=2, - padding=1) - self.deblur_L3_conv = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=2, - padding=1) - self.upsample = nn.Upsample(scale_factor=2, - mode="bilinear", - align_corners=False, - align_mode=0) + self.deblur_L2_conv = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=2, + padding=1) + self.deblur_L3_conv = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=2, + padding=1) + self.upsample = nn.Upsample( + scale_factor=2, mode="bilinear", align_corners=False, align_mode=0) def forward(self, x): if self.HR_in: @@ -189,6 +195,7 @@ class TSAFusion(nn.Layer): nframes (int): Number of frames. Default: 5. center (int): The index of center frame. Default: 2. """ + def __init__(self, nf=64, nframes=5, center=2): super(TSAFusion, self).__init__() self.nf = nf @@ -196,81 +203,88 @@ def __init__(self, nf=64, nframes=5, center=2): self.center = center self.sigmoid = nn.Sigmoid() self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1) - self.tAtt_2 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) - self.tAtt_1 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) - self.fea_fusion = nn.Conv2D(in_channels=self.nf * self.nframes, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) - self.sAtt_1 = nn.Conv2D(in_channels=self.nf * self.nframes, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) + self.tAtt_2 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) + self.tAtt_1 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) + self.fea_fusion = nn.Conv2D( + in_channels=self.nf * self.nframes, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) + self.sAtt_1 = nn.Conv2D( + in_channels=self.nf * self.nframes, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) self.max_pool = nn.MaxPool2D(3, stride=2, padding=1) self.avg_pool = nn.AvgPool2D(3, stride=2, padding=1, exclusive=False) - self.sAtt_2 = nn.Conv2D(in_channels=2 * self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) - self.sAtt_3 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) + self.sAtt_2 = nn.Conv2D( + in_channels=2 * self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) + self.sAtt_3 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) self.sAtt_4 = nn.Conv2D( in_channels=self.nf, out_channels=self.nf, kernel_size=1, stride=1, - padding=0, - ) - self.sAtt_5 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) - self.sAtt_add_1 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) - self.sAtt_add_2 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) - self.sAtt_L1 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1, - padding=0) + padding=0, ) + self.sAtt_5 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) + self.sAtt_add_1 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) + self.sAtt_add_2 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) + self.sAtt_L1 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1, + padding=0) self.sAtt_L2 = nn.Conv2D( in_channels=2 * self.nf, out_channels=self.nf, kernel_size=3, stride=1, - padding=1, - ) - self.sAtt_L3 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) - self.upsample = nn.Upsample(scale_factor=2, - mode="bilinear", - align_corners=False, - align_mode=0) + padding=1, ) + self.sAtt_L3 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) + self.upsample = nn.Upsample( + scale_factor=2, mode="bilinear", align_corners=False, align_mode=0) def forward(self, aligned_fea): """ @@ -347,6 +361,7 @@ class DCNPack(nn.Layer): Ref: Delving Deep into Deformable Alignment in Video Super-Resolution. """ + def __init__(self, num_filters=64, kernel_size=3, @@ -361,23 +376,24 @@ def __init__(self, self.num_filters = num_filters if isinstance(kernel_size, int): self.kernel_size = [kernel_size, kernel_size] - self.conv_offset_mask = nn.Conv2D(in_channels=self.num_filters, - out_channels=self.deformable_groups * - 3 * self.kernel_size[0] * - self.kernel_size[1], - kernel_size=self.kernel_size, - stride=stride, - padding=padding) + self.conv_offset_mask = nn.Conv2D( + in_channels=self.num_filters, + out_channels=self.deformable_groups * 3 * self.kernel_size[0] * + self.kernel_size[1], + kernel_size=self.kernel_size, + stride=stride, + padding=padding) self.total_channels = self.deformable_groups * 3 * self.kernel_size[ 0] * self.kernel_size[1] self.split_channels = self.total_channels // 3 - self.dcn = DeformConv2D(in_channels=self.num_filters, - out_channels=self.num_filters, - kernel_size=self.kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - deformable_groups=self.deformable_groups) + self.dcn = DeformConv2D( + in_channels=self.num_filters, + out_channels=self.num_filters, + kernel_size=self.kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + deformable_groups=self.deformable_groups) self.sigmoid = nn.Sigmoid() # init conv offset constant_init(self.conv_offset_mask, 0., 0.) @@ -408,104 +424,103 @@ class PCDAlign(nn.Layer): nf (int): Channel number of middle features. Default: 64. groups (int): Deformable groups. Defaults: 8. """ + def __init__(self, nf=64, groups=8): super(PCDAlign, self).__init__() self.nf = nf self.groups = groups self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1) - self.upsample = nn.Upsample(scale_factor=2, - mode="bilinear", - align_corners=False, - align_mode=0) + self.upsample = nn.Upsample( + scale_factor=2, mode="bilinear", align_corners=False, align_mode=0) # Pyramid has three levels: # L3: level 3, 1/4 spatial size # L2: level 2, 1/2 spatial size # L1: level 1, original spatial size # L3 - self.PCD_Align_L3_offset_conv1 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L3_offset_conv2 = nn.Conv2D(in_channels=nf, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L3_dcn = DCNPack(num_filters=nf, - kernel_size=3, - stride=1, - padding=1, - deformable_groups=groups) + self.PCD_Align_L3_offset_conv1 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_L3_offset_conv2 = nn.Conv2D( + in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1) + self.PCD_Align_L3_dcn = DCNPack( + num_filters=nf, + kernel_size=3, + stride=1, + padding=1, + deformable_groups=groups) #L2 - self.PCD_Align_L2_offset_conv1 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L2_offset_conv2 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L2_offset_conv3 = nn.Conv2D(in_channels=nf, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L2_dcn = DCNPack(num_filters=nf, - kernel_size=3, - stride=1, - padding=1, - deformable_groups=groups) - self.PCD_Align_L2_fea_conv = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) + self.PCD_Align_L2_offset_conv1 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_L2_offset_conv2 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_L2_offset_conv3 = nn.Conv2D( + in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1) + self.PCD_Align_L2_dcn = DCNPack( + num_filters=nf, + kernel_size=3, + stride=1, + padding=1, + deformable_groups=groups) + self.PCD_Align_L2_fea_conv = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) #L1 - self.PCD_Align_L1_offset_conv1 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L1_offset_conv2 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L1_offset_conv3 = nn.Conv2D(in_channels=nf, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_L1_dcn = DCNPack(num_filters=nf, - kernel_size=3, - stride=1, - padding=1, - deformable_groups=groups) - self.PCD_Align_L1_fea_conv = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) + self.PCD_Align_L1_offset_conv1 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_L1_offset_conv2 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_L1_offset_conv3 = nn.Conv2D( + in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1) + self.PCD_Align_L1_dcn = DCNPack( + num_filters=nf, + kernel_size=3, + stride=1, + padding=1, + deformable_groups=groups) + self.PCD_Align_L1_fea_conv = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) #cascade - self.PCD_Align_cas_offset_conv1 = nn.Conv2D(in_channels=nf * 2, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_cas_offset_conv2 = nn.Conv2D(in_channels=nf, - out_channels=nf, - kernel_size=3, - stride=1, - padding=1) - self.PCD_Align_cascade_dcn = DCNPack(num_filters=nf, - kernel_size=3, - stride=1, - padding=1, - deformable_groups=groups) + self.PCD_Align_cas_offset_conv1 = nn.Conv2D( + in_channels=nf * 2, + out_channels=nf, + kernel_size=3, + stride=1, + padding=1) + self.PCD_Align_cas_offset_conv2 = nn.Conv2D( + in_channels=nf, out_channels=nf, kernel_size=3, stride=1, padding=1) + self.PCD_Align_cascade_dcn = DCNPack( + num_filters=nf, + kernel_size=3, + stride=1, + padding=1, + deformable_groups=groups) def forward(self, nbr_fea_l, ref_fea_l): """Align neighboring frame features to the reference frame features. @@ -594,6 +609,7 @@ class EDVRNet(nn.Layer): with_tsa (bool): Whether has TSA module. Default: True. TSA_only (bool): Whether only use TSA module. Default: False. """ + def __init__(self, in_nf=3, out_nf=3, @@ -623,90 +639,94 @@ def __init__(self, self.Leaky_relu = nn.LeakyReLU(negative_slope=0.1) if self.predeblur: - self.pre_deblur = PredeblurResNetPyramid(in_nf=self.in_nf, - nf=self.nf, - HR_in=self.HR_in) - self.cov_1 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1) + self.pre_deblur = PredeblurResNetPyramid( + in_nf=self.in_nf, nf=self.nf, HR_in=self.HR_in) + self.cov_1 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1) else: - self.conv_first = nn.Conv2D(in_channels=self.in_nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) + self.conv_first = nn.Conv2D( + in_channels=self.in_nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) #feature extraction module self.feature_extractor = MakeMultiBlocks(ResidualBlockNoBN, self.front_RBs, self.nf) - self.fea_L2_conv1 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=2, - padding=1) - self.fea_L2_conv2 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) + self.fea_L2_conv1 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=2, + padding=1) + self.fea_L2_conv2 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) self.fea_L3_conv1 = nn.Conv2D( in_channels=self.nf, out_channels=self.nf, kernel_size=3, stride=2, - padding=1, - ) - self.fea_L3_conv2 = nn.Conv2D(in_channels=self.nf, - out_channels=self.nf, - kernel_size=3, - stride=1, - padding=1) + padding=1, ) + self.fea_L3_conv2 = nn.Conv2D( + in_channels=self.nf, + out_channels=self.nf, + kernel_size=3, + stride=1, + padding=1) #PCD alignment module self.PCDModule = PCDAlign(nf=self.nf, groups=self.groups) #TSA Fusion module if self.w_TSA: - self.TSAModule = TSAFusion(nf=self.nf, - nframes=self.nframes, - center=self.center) + self.TSAModule = TSAFusion( + nf=self.nf, nframes=self.nframes, center=self.center) else: - self.TSAModule = nn.Conv2D(in_channels=self.nframes * self.nf, - out_channels=self.nf, - kernel_size=1, - stride=1) + self.TSAModule = nn.Conv2D( + in_channels=self.nframes * self.nf, + out_channels=self.nf, + kernel_size=1, + stride=1) #reconstruction module self.reconstructor = MakeMultiBlocks(ResidualBlockNoBN, self.back_RBs, self.nf) - self.upconv1 = nn.Conv2D(in_channels=self.nf, - out_channels=4 * self.nf, - kernel_size=3, - stride=1, - padding=1) + self.upconv1 = nn.Conv2D( + in_channels=self.nf, + out_channels=4 * self.nf, + kernel_size=3, + stride=1, + padding=1) self.pixel_shuffle = nn.PixelShuffle(2) - self.upconv2 = nn.Conv2D(in_channels=self.nf, - out_channels=4 * 64, - kernel_size=3, - stride=1, - padding=1) - self.HRconv = nn.Conv2D(in_channels=64, - out_channels=64, - kernel_size=3, - stride=1, - padding=1) - self.conv_last = nn.Conv2D(in_channels=64, - out_channels=self.out_nf, - kernel_size=3, - stride=1, - padding=1) + self.upconv2 = nn.Conv2D( + in_channels=self.nf, + out_channels=4 * 64, + kernel_size=3, + stride=1, + padding=1) + self.HRconv = nn.Conv2D( + in_channels=64, out_channels=64, kernel_size=3, stride=1, padding=1) + self.conv_last = nn.Conv2D( + in_channels=64, + out_channels=self.out_nf, + kernel_size=3, + stride=1, + padding=1) if self.scale_factor == 4: - self.upsample = nn.Upsample(scale_factor=self.scale_factor, - mode="bilinear", - align_corners=False, - align_mode=0) + self.upsample = nn.Upsample( + scale_factor=self.scale_factor, + mode="bilinear", + align_corners=False, + align_mode=0) def forward(self, x): """ @@ -753,8 +773,8 @@ def forward(self, x): aligned_fea = [] for i in range(N): nbr_fea_l = [ - L1_fea[:, i, :, :, :], L2_fea[:, i, :, :, :], L3_fea[:, - i, :, :, :] + L1_fea[:, i, :, :, :], L2_fea[:, i, :, :, :], + L3_fea[:, i, :, :, :] ] aligned_fea.append(self.PCDModule(nbr_fea_l, ref_fea_l)) diff --git a/paddlers/models/ppgan/models/generators/generater_animegan.py b/paddlers/models/ppgan/models/generators/generater_animegan.py index 2d3f3aa0..ca1cadb0 100644 --- a/paddlers/models/ppgan/models/generators/generater_animegan.py +++ b/paddlers/models/ppgan/models/generators/generater_animegan.py @@ -13,17 +13,18 @@ class Conv2DNormLReLU(nn.Layer): def __init__(self, in_channels: int, out_channels: int, - kernel_size: int = 3, - stride: int = 1, - padding: int = 1, + kernel_size: int=3, + stride: int=1, + padding: int=1, bias_attr=False) -> None: super().__init__() - self.conv = nn.Conv2D(in_channels, - out_channels, - kernel_size, - stride, - padding, - bias_attr=bias_attr) + self.conv = nn.Conv2D( + in_channels, + out_channels, + kernel_size, + stride, + padding, + bias_attr=bias_attr) # NOTE layer norm is crucial for animegan! self.norm = nn.GroupNorm(1, out_channels) self.lrelu = nn.LeakyReLU(0.2) @@ -39,9 +40,11 @@ class ResBlock(nn.Layer): def __init__(self, in_channels: int, out_channels: int) -> None: super().__init__() self.body = nn.Sequential( - Conv2DNormLReLU(in_channels, out_channels, 1, padding=0), + Conv2DNormLReLU( + in_channels, out_channels, 1, padding=0), Conv2DNormLReLU(out_channels, out_channels, 3), - nn.Conv2D(out_channels, out_channels // 2, 1, bias_attr=False)) + nn.Conv2D( + out_channels, out_channels // 2, 1, bias_attr=False)) def forward(self, x0): x = self.body(x0) @@ -61,28 +64,30 @@ def __init__(self, self.bottle_channels = round(self.expansion * self.in_channels) self.body = nn.Sequential( # pw - Conv2DNormLReLU(self.in_channels, - self.bottle_channels, - kernel_size=1, - bias_attr=bias_attr), + Conv2DNormLReLU( + self.in_channels, + self.bottle_channels, + kernel_size=1, + bias_attr=bias_attr), # dw - nn.Conv2D(self.bottle_channels, - self.bottle_channels, - kernel_size=3, - stride=1, - padding=0, - groups=self.bottle_channels, - bias_attr=True), + nn.Conv2D( + self.bottle_channels, + self.bottle_channels, + kernel_size=3, + stride=1, + padding=0, + groups=self.bottle_channels, + bias_attr=True), nn.GroupNorm(1, self.bottle_channels), nn.LeakyReLU(0.2), # pw & linear - nn.Conv2D(self.bottle_channels, - self.out_channels, - kernel_size=1, - padding=0, - bias_attr=False), - nn.GroupNorm(1, self.out_channels), - ) + nn.Conv2D( + self.bottle_channels, + self.out_channels, + kernel_size=1, + padding=0, + bias_attr=False), + nn.GroupNorm(1, self.out_channels), ) def forward(self, x0): x = self.body(x0) @@ -97,26 +102,41 @@ def forward(self, x0): class AnimeGeneratorLite(nn.Layer): def __init__(self) -> None: super().__init__() - self.A = nn.Sequential(Conv2DNormLReLU(3, 32, 7, padding=3), - Conv2DNormLReLU(32, 32, stride=2), - Conv2DNormLReLU(32, 32)) - - self.B = nn.Sequential(Conv2DNormLReLU(32, 64, stride=2), - Conv2DNormLReLU(64, 64), Conv2DNormLReLU(64, 64)) - - self.C = nn.Sequential(ResBlock(64, 128), ResBlock(64, 128), - ResBlock(64, 128), ResBlock(64, 128)) - - self.D = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'), - Conv2DNormLReLU(64, 64), Conv2DNormLReLU(64, 64), - Conv2DNormLReLU(64, 64)) - - self.E = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'), - Conv2DNormLReLU(64, 32), Conv2DNormLReLU(32, 32), - Conv2DNormLReLU(32, 32, 7, padding=3)) - - self.out = nn.Sequential(nn.Conv2D(32, 3, 1, bias_attr=False), - nn.Tanh()) + self.A = nn.Sequential( + Conv2DNormLReLU( + 3, 32, 7, padding=3), + Conv2DNormLReLU( + 32, 32, stride=2), + Conv2DNormLReLU(32, 32)) + + self.B = nn.Sequential( + Conv2DNormLReLU( + 32, 64, stride=2), + Conv2DNormLReLU(64, 64), + Conv2DNormLReLU(64, 64)) + + self.C = nn.Sequential( + ResBlock(64, 128), + ResBlock(64, 128), ResBlock(64, 128), ResBlock(64, 128)) + + self.D = nn.Sequential( + nn.Upsample( + scale_factor=2, mode='bilinear'), + Conv2DNormLReLU(64, 64), + Conv2DNormLReLU(64, 64), + Conv2DNormLReLU(64, 64)) + + self.E = nn.Sequential( + nn.Upsample( + scale_factor=2, mode='bilinear'), + Conv2DNormLReLU(64, 32), + Conv2DNormLReLU(32, 32), + Conv2DNormLReLU( + 32, 32, 7, padding=3)) + + self.out = nn.Sequential( + nn.Conv2D( + 32, 3, 1, bias_attr=False), nn.Tanh()) def forward(self, x): x = self.A(x) @@ -132,31 +152,42 @@ def forward(self, x): class AnimeGenerator(nn.Layer): def __init__(self) -> None: super().__init__() - self.A = nn.Sequential(Conv2DNormLReLU(3, 32, 7, padding=3), - Conv2DNormLReLU(32, 64, stride=2), - Conv2DNormLReLU(64, 64)) - - self.B = nn.Sequential(Conv2DNormLReLU(64, 128, stride=2), - Conv2DNormLReLU(128, 128), - Conv2DNormLReLU(128, 128)) - - self.C = nn.Sequential(InvertedresBlock(128, 2, 256), - InvertedresBlock(256, 2, 256), - InvertedresBlock(256, 2, 256), - InvertedresBlock(256, 2, 256), - Conv2DNormLReLU(256, 128)) - - self.D = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'), - Conv2DNormLReLU(128, 128), - Conv2DNormLReLU(128, 128)) - - self.E = nn.Sequential(nn.Upsample(scale_factor=2, mode='bilinear'), - Conv2DNormLReLU(128, 64), - Conv2DNormLReLU(64, 64), - Conv2DNormLReLU(64, 32, 7, padding=3)) - - self.out = nn.Sequential(nn.Conv2D(32, 3, 1, bias_attr=False), - nn.Tanh()) + self.A = nn.Sequential( + Conv2DNormLReLU( + 3, 32, 7, padding=3), + Conv2DNormLReLU( + 32, 64, stride=2), + Conv2DNormLReLU(64, 64)) + + self.B = nn.Sequential( + Conv2DNormLReLU( + 64, 128, stride=2), + Conv2DNormLReLU(128, 128), + Conv2DNormLReLU(128, 128)) + + self.C = nn.Sequential( + InvertedresBlock(128, 2, 256), + InvertedresBlock(256, 2, 256), + InvertedresBlock(256, 2, 256), + InvertedresBlock(256, 2, 256), Conv2DNormLReLU(256, 128)) + + self.D = nn.Sequential( + nn.Upsample( + scale_factor=2, mode='bilinear'), + Conv2DNormLReLU(128, 128), + Conv2DNormLReLU(128, 128)) + + self.E = nn.Sequential( + nn.Upsample( + scale_factor=2, mode='bilinear'), + Conv2DNormLReLU(128, 64), + Conv2DNormLReLU(64, 64), + Conv2DNormLReLU( + 64, 32, 7, padding=3)) + + self.out = nn.Sequential( + nn.Conv2D( + 32, 3, 1, bias_attr=False), nn.Tanh()) def forward(self, x): x = self.A(x) diff --git a/paddlers/models/ppgan/models/generators/generater_lapstyle.py b/paddlers/models/ppgan/models/generators/generater_lapstyle.py index 20108d7c..31b5b544 100644 --- a/paddlers/models/ppgan/models/generators/generater_lapstyle.py +++ b/paddlers/models/ppgan/models/generators/generater_lapstyle.py @@ -73,8 +73,8 @@ def adaptive_instance_normalization(content_feat, style_feat): style_mean, style_std = calc_mean_std(style_feat) content_mean, content_std = calc_mean_std(content_feat) - normalized_feat = (content_feat - - content_mean.expand(size)) / content_std.expand(size) + normalized_feat = ( + content_feat - content_mean.expand(size)) / content_std.expand(size) return normalized_feat * style_std.expand(size) + style_mean.expand(size) @@ -88,12 +88,17 @@ class ResnetBlock(nn.Layer): Args: dim (int): Channel number of intermediate features. """ + def __init__(self, dim): super(ResnetBlock, self).__init__() - self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(dim, dim, (3, 3)), nn.ReLU(), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(dim, dim, (3, 3))) + self.conv_block = nn.Sequential( + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), + nn.Conv2D(dim, dim, (3, 3)), + nn.ReLU(), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), + nn.Conv2D(dim, dim, (3, 3))) def forward(self, x): out = x + self.conv_block(x) @@ -110,11 +115,14 @@ class ConvBlock(nn.Layer): dim1 (int): Channel number of input features. dim2 (int): Channel number of output features. """ + def __init__(self, dim1, dim2): super(ConvBlock, self).__init__() - self.conv_block = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(dim1, dim2, (3, 3)), - nn.ReLU()) + self.conv_block = nn.Sequential( + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), + nn.Conv2D(dim1, dim2, (3, 3)), + nn.ReLU()) def forward(self, x): out = self.conv_block(x) @@ -128,6 +136,7 @@ class DecoderNet(nn.Layer): Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer. """ + def __init__(self): super(DecoderNet, self).__init__() @@ -142,8 +151,9 @@ def __init__(self): self.convblock_11 = ConvBlock(64, 64) self.upsample = nn.Upsample(scale_factor=2, mode='nearest') - self.final_conv = nn.Sequential(nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 3, (3, 3))) + self.final_conv = nn.Sequential( + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 3, (3, 3))) def forward(self, cF, sF): @@ -167,8 +177,6 @@ def forward(self, cF, sF): return out - - @GENERATORS.register() class Encoder(nn.Layer): """Encoder of Drafting module. @@ -176,76 +184,97 @@ class Encoder(nn.Layer): Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer. """ + def __init__(self): super(Encoder, self).__init__() vgg_net = nn.Sequential( nn.Conv2D(3, 3, (1, 1)), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(3, 64, (3, 3)), nn.ReLU(), # relu1-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 64, (3, 3)), nn.ReLU(), # relu1-2 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.MaxPool2D( + (2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 128, (3, 3)), nn.ReLU(), # relu2-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(128, 128, (3, 3)), nn.ReLU(), # relu2-2 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.MaxPool2D( + (2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(128, 256, (3, 3)), nn.ReLU(), # relu3-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(256, 256, (3, 3)), nn.ReLU(), # relu3-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(256, 256, (3, 3)), nn.ReLU(), # relu3-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(256, 256, (3, 3)), nn.ReLU(), # relu3-4 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.MaxPool2D( + (2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(256, 512, (3, 3)), nn.ReLU(), # relu4-1, this is the last layer used - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu4-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu4-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu4-4 - nn.MaxPool2D((2, 2), (2, 2), (0, 0), ceil_mode=True), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.MaxPool2D( + (2, 2), (2, 2), (0, 0), ceil_mode=True), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu5-1 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu5-2 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU(), # relu5-3 - nn.Pad2D([1, 1, 1, 1], mode='reflect'), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(512, 512, (3, 3)), nn.ReLU() # relu5-4 ) weight_path = get_path_from_url( 'https://paddlegan.bj.bcebos.com/models/vgg_normalised.pdparams') vgg_net.set_dict(paddle.load(weight_path)) - self.enc_1 = nn.Sequential(*list( - vgg_net.children())[:4]) # input -> relu1_1 - self.enc_2 = nn.Sequential(*list( - vgg_net.children())[4:11]) # relu1_1 -> relu2_1 - self.enc_3 = nn.Sequential(*list( - vgg_net.children())[11:18]) # relu2_1 -> relu3_1 - self.enc_4 = nn.Sequential(*list( - vgg_net.children())[18:31]) # relu3_1 -> relu4_1 - self.enc_5 = nn.Sequential(*list( - vgg_net.children())[31:44]) # relu4_1 -> relu5_1 + self.enc_1 = nn.Sequential( + *list(vgg_net.children())[:4]) # input -> relu1_1 + self.enc_2 = nn.Sequential( + *list(vgg_net.children())[4:11]) # relu1_1 -> relu2_1 + self.enc_3 = nn.Sequential( + *list(vgg_net.children())[11:18]) # relu2_1 -> relu3_1 + self.enc_4 = nn.Sequential( + *list(vgg_net.children())[18:31]) # relu3_1 -> relu4_1 + self.enc_5 = nn.Sequential( + *list(vgg_net.children())[31:44]) # relu4_1 -> relu5_1 def forward(self, x): out = {} @@ -269,32 +298,33 @@ class RevisionNet(nn.Layer): Drafting and Revision: Laplacian Pyramid Network for Fast High-Quality Artistic Style Transfer. """ + def __init__(self, input_nc=6): super(RevisionNet, self).__init__() DownBlock = [] DownBlock += [ - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(input_nc, 64, (3, 3)), + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(input_nc, 64, (3, 3)), nn.ReLU() ] DownBlock += [ - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 64, (3, 3), stride=2), - nn.ReLU() + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D( + 64, 64, (3, 3), stride=2), nn.ReLU() ] self.resblock = ResnetBlock(64) UpBlock = [] UpBlock += [ - nn.Upsample(scale_factor=2, mode='nearest'), - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 64, (3, 3)), + nn.Upsample( + scale_factor=2, mode='nearest'), nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 64, (3, 3)), nn.ReLU() ] UpBlock += [ - nn.Pad2D([1, 1, 1, 1], mode='reflect'), - nn.Conv2D(64, 3, (3, 3)) + nn.Pad2D( + [1, 1, 1, 1], mode='reflect'), nn.Conv2D(64, 3, (3, 3)) ] self.DownBlock = nn.Sequential(*DownBlock) diff --git a/paddlers/models/ppgan/models/generators/generater_photopen.py b/paddlers/models/ppgan/models/generators/generater_photopen.py index ecbc9073..24bffc5c 100644 --- a/paddlers/models/ppgan/models/generators/generater_photopen.py +++ b/paddlers/models/ppgan/models/generators/generater_photopen.py @@ -21,6 +21,7 @@ from ...utils.photopen import build_norm_layer, simam, Dict from .builder import GENERATORS + class SPADE(nn.Layer): def __init__(self, config_text, norm_nc, label_nc): super(SPADE, self).__init__() @@ -57,6 +58,7 @@ def forward(self, x, segmap): return out + class SPADEResnetBlock(nn.Layer): def __init__(self, fin, fout, opt): super(SPADEResnetBlock, self).__init__() @@ -64,7 +66,7 @@ def __init__(self, fin, fout, opt): # Attributes self.learned_shortcut = (fin != fout) fmiddle = min(fin, fout) - + # define spade layers spade_config_str = opt.norm_G.replace('spectral', '') self.spade_0 = SPADE(spade_config_str, fin, opt.semantic_nc) @@ -75,22 +77,22 @@ def __init__(self, fin, fout, opt): # define act_conv layers self.act_conv_0 = nn.Sequential(*[ nn.GELU(), - spectral_norm(nn.Conv2D(fin, fmiddle, 3, 1, 1, - weight_attr=None, - bias_attr=None)), - ]) + spectral_norm( + nn.Conv2D( + fin, fmiddle, 3, 1, 1, weight_attr=None, bias_attr=None)), + ]) self.act_conv_1 = nn.Sequential(*[ nn.GELU(), - spectral_norm(nn.Conv2D(fmiddle, fout, 3, 1, 1, - weight_attr=None, - bias_attr=None)), - ]) + spectral_norm( + nn.Conv2D( + fmiddle, fout, 3, 1, 1, weight_attr=None, bias_attr=None)), + ]) if self.learned_shortcut: self.act_conv_s = nn.Sequential(*[ - spectral_norm(nn.Conv2D(fin, fout, 1, 1, 0, bias_attr=False, - weight_attr=None)), - ]) - + spectral_norm( + nn.Conv2D( + fin, fout, 1, 1, 0, bias_attr=False, weight_attr=None)), + ]) def forward(self, x, seg): x_s = self.shortcut(x, seg) @@ -107,32 +109,33 @@ def shortcut(self, x, seg): x_s = x return x_s + @GENERATORS.register() class SPADEGenerator(nn.Layer): - def __init__(self, - ngf, - num_upsampling_layers, - crop_size, - aspect_ratio, - norm_G, - semantic_nc, - use_vae, - nef, - ): + def __init__( + self, + ngf, + num_upsampling_layers, + crop_size, + aspect_ratio, + norm_G, + semantic_nc, + use_vae, + nef, ): super(SPADEGenerator, self).__init__() - + opt = { - 'ngf': ngf, - 'num_upsampling_layers': num_upsampling_layers, - 'crop_size': crop_size, - 'aspect_ratio': aspect_ratio, - 'norm_G': norm_G, - 'semantic_nc': semantic_nc, - 'use_vae': use_vae, - 'nef': nef, - } + 'ngf': ngf, + 'num_upsampling_layers': num_upsampling_layers, + 'crop_size': crop_size, + 'aspect_ratio': aspect_ratio, + 'norm_G': norm_G, + 'semantic_nc': semantic_nc, + 'use_vae': use_vae, + 'nef': nef, + } self.opt = Dict(opt) - + nf = self.opt.ngf self.sw, self.sh = self.compute_latent_vector_size(self.opt) @@ -213,7 +216,8 @@ def compute_latent_vector_size(self, opt): sh = round(sw / opt.aspect_ratio) return sw, sh - + + class VAE_Encoder(nn.Layer): def __init__(self, opt): super(VAE_Encoder, self).__init__() @@ -224,31 +228,51 @@ def __init__(self, opt): InstanceNorm = build_norm_layer('instance') model = [ - spectral_norm(nn.Conv2D(3, ndf, kw, 2, pw, - weight_attr=None, - bias_attr=None)), + spectral_norm( + nn.Conv2D( + 3, ndf, kw, 2, pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf), - nn.GELU(), - spectral_norm(nn.Conv2D(ndf * 1, ndf * 2, kw, 2, pw, + spectral_norm( + nn.Conv2D( + ndf * 1, + ndf * 2, + kw, + 2, + pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf * 2), - nn.GELU(), - spectral_norm(nn.Conv2D(ndf * 2, ndf * 4, kw, 2, pw, + spectral_norm( + nn.Conv2D( + ndf * 2, + ndf * 4, + kw, + 2, + pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf * 4), - nn.GELU(), - spectral_norm(nn.Conv2D(ndf * 4, ndf * 8, kw, 2, pw, + spectral_norm( + nn.Conv2D( + ndf * 4, + ndf * 8, + kw, + 2, + pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf * 8), - nn.GELU(), - spectral_norm(nn.Conv2D(ndf * 8, ndf * 8, kw, 2, pw, + spectral_norm( + nn.Conv2D( + ndf * 8, + ndf * 8, + kw, + 2, + pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf * 8), @@ -256,12 +280,18 @@ def __init__(self, opt): if opt.crop_size >= 256: model += [ nn.GELU(), - spectral_norm(nn.Conv2D(ndf * 8, ndf * 8, kw, 2, pw, + spectral_norm( + nn.Conv2D( + ndf * 8, + ndf * 8, + kw, + 2, + pw, weight_attr=None, bias_attr=None)), InstanceNorm(ndf * 8), ] - model += [nn.GELU(),] + model += [nn.GELU(), ] self.flatten = nn.Flatten(1, -1) self.so = 4 @@ -272,8 +302,7 @@ def __init__(self, opt): def forward(self, x): x = self.model(x) - + x = self.flatten(x) return self.fc_mu(x), self.fc_var(x) - diff --git a/paddlers/models/ppgan/models/generators/generator_firstorder.py b/paddlers/models/ppgan/models/generators/generator_firstorder.py index d89beb4e..76151b72 100644 --- a/paddlers/models/ppgan/models/generators/generator_firstorder.py +++ b/paddlers/models/ppgan/models/generators/generator_firstorder.py @@ -42,6 +42,7 @@ class FirstOrderGenerator(nn.Layer): equivariance_value, equivariance_jacobian] """ + def __init__(self, generator_cfg, kp_detector_cfg, common_params, train_params, dis_scales): super(FirstOrderGenerator, self).__init__() @@ -59,9 +60,8 @@ def __init__(self, generator_cfg, kp_detector_cfg, common_params, def forward(self, x, discriminator, kp_extractor_ori=None): kp_source = self.kp_extractor(x['source']) kp_driving = self.kp_extractor(x['driving']) - generated = self.generator(x['source'], - kp_source=kp_source, - kp_driving=kp_driving) + generated = self.generator( + x['source'], kp_source=kp_source, kp_driving=kp_driving) generated.update({'kp_source': kp_source, 'kp_driving': kp_driving}) loss_values = {} @@ -84,8 +84,8 @@ def forward(self, x, discriminator, kp_extractor_ori=None): if self.loss_weights['generator_gan'] != 0: discriminator_maps_generated = discriminator( pyramide_generated, kp=detach_kp(kp_driving)) - discriminator_maps_real = discriminator(pyramide_real, - kp=detach_kp(kp_driving)) + discriminator_maps_real = discriminator( + pyramide_real, kp=detach_kp(kp_driving)) value_total = 0 for scale in self.disc_scales: key = 'prediction_map_%s' % scale @@ -118,17 +118,17 @@ def forward(self, x, discriminator, kp_extractor_ori=None): # Value loss part if self.loss_weights['equivariance_value'] != 0: - value = paddle.abs( - kp_driving['value'] - - transform.warp_coordinates(transformed_kp['value'])).mean() + value = paddle.abs(kp_driving['value'] - + transform.warp_coordinates(transformed_kp[ + 'value'])).mean() loss_values['equivariance_value'] = self.loss_weights[ 'equivariance_value'] * value # jacobian loss part if self.loss_weights['equivariance_jacobian'] != 0: - jacobian_transformed = paddle.matmul( - *broadcast(transform.jacobian(transformed_kp['value']), - transformed_kp['jacobian'])) + jacobian_transformed = paddle.matmul(*broadcast( + transform.jacobian(transformed_kp['value']), + transformed_kp['jacobian'])) normed_driving = paddle.inverse(kp_driving['jacobian']) normed_transformed = jacobian_transformed value = paddle.matmul( @@ -159,6 +159,7 @@ class VGG19(nn.Layer): """ Vgg19 network for perceptual loss. See Sec 3.3. """ + def __init__(self, requires_grad=False): super(VGG19, self).__init__() pretrained_url = 'https://paddlegan.bj.bcebos.com/models/vgg19.pdparams' @@ -209,10 +210,10 @@ class Transform: """ Random tps transformation for equivariance constraints. See Sec 3.3 """ + def __init__(self, bs, **kwargs): - noise = paddle.distribution.Normal(loc=[0], - scale=[kwargs['sigma_affine'] - ]).sample([bs, 2, 3]) + noise = paddle.distribution.Normal( + loc=[0], scale=[kwargs['sigma_affine']]).sample([bs, 2, 3]) noise = noise.reshape((bs, 2, 3)) self.theta = noise + paddle.tensor.eye(2, 3, dtype='float32').reshape( (1, 2, 3)) @@ -234,11 +235,12 @@ def transform_frame(self, frame): grid = grid.reshape((1, frame.shape[2] * frame.shape[3], 2)) grid = self.warp_coordinates(grid).reshape( (self.bs, frame.shape[2], frame.shape[3], 2)) - return F.grid_sample(frame, - grid, - mode='bilinear', - padding_mode='reflection', - align_corners=True) + return F.grid_sample( + frame, + grid, + mode='bilinear', + padding_mode='reflection', + align_corners=True) def warp_coordinates(self, coordinates): theta = self.theta.astype('float32') @@ -251,8 +253,8 @@ def warp_coordinates(self, coordinates): theta_part_a = theta[:, :, :, :2] theta_part_b = theta[:, :, :, 2:] - transformed = paddle.fluid.layers.matmul( - *broadcast(theta_part_a, coordinates)) + theta_part_b #M*p + m0 + transformed = paddle.fluid.layers.matmul(*broadcast( + theta_part_a, coordinates)) + theta_part_b #M*p + m0 transformed = transformed.squeeze(-1) if self.tps: control_points = self.control_points.astype('float32') @@ -272,12 +274,10 @@ def warp_coordinates(self, coordinates): def jacobian(self, coordinates): new_coordinates = self.warp_coordinates(coordinates) assert len(new_coordinates.shape) == 3 - grad_x = paddle.grad(new_coordinates[:, :, 0].sum(), - coordinates, - create_graph=True) - grad_y = paddle.grad(new_coordinates[:, :, 1].sum(), - coordinates, - create_graph=True) + grad_x = paddle.grad( + new_coordinates[:, :, 0].sum(), coordinates, create_graph=True) + grad_y = paddle.grad( + new_coordinates[:, :, 1].sum(), coordinates, create_graph=True) jacobian = paddle.concat( [grad_x[0].unsqueeze(-2), grad_y[0].unsqueeze(-2)], axis=-2) return jacobian diff --git a/paddlers/models/ppgan/models/generators/generator_starganv2.py b/paddlers/models/ppgan/models/generators/generator_starganv2.py index 7d39de59..86b28c4d 100644 --- a/paddlers/models/ppgan/models/generators/generator_starganv2.py +++ b/paddlers/models/ppgan/models/generators/generator_starganv2.py @@ -21,6 +21,7 @@ class AvgPool2D(nn.Layer): Peplace avg_pool2d because paddle.grad will cause avg_pool2d to report an error when training. In the future Paddle framework will supports avg_pool2d and remove this class. """ + def __init__(self): super(AvgPool2D, self).__init__() self.filter = paddle.to_tensor([[1, 1], [1, 1]], dtype='float32') @@ -49,12 +50,10 @@ def _build_weights(self, dim_in, dim_out): self.conv1 = nn.Conv2D(dim_in, dim_in, 3, 1, 1) self.conv2 = nn.Conv2D(dim_in, dim_out, 3, 1, 1) if self.normalize: - self.norm1 = nn.InstanceNorm2D(dim_in, - weight_attr=True, - bias_attr=True) - self.norm2 = nn.InstanceNorm2D(dim_in, - weight_attr=True, - bias_attr=True) + self.norm1 = nn.InstanceNorm2D( + dim_in, weight_attr=True, bias_attr=True) + self.norm2 = nn.InstanceNorm2D( + dim_in, weight_attr=True, bias_attr=True) if self.learned_sc: self.conv1x1 = nn.Conv2D(dim_in, dim_out, 1, 1, 0, bias_attr=False) @@ -86,9 +85,8 @@ def forward(self, x): class AdaIN(nn.Layer): def __init__(self, style_dim, num_features): super().__init__() - self.norm = nn.InstanceNorm2D(num_features, - weight_attr=False, - bias_attr=False) + self.norm = nn.InstanceNorm2D( + num_features, weight_attr=False, bias_attr=False) self.fc = nn.Linear(style_dim, num_features * 2) def forward(self, x, s): @@ -170,8 +168,10 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1): self.encode = nn.LayerList() self.decode = nn.LayerList() self.to_rgb = nn.Sequential( - nn.InstanceNorm2D(dim_in, weight_attr=True, bias_attr=True), - nn.LeakyReLU(0.2), nn.Conv2D(dim_in, 3, 1, 1, 0)) + nn.InstanceNorm2D( + dim_in, weight_attr=True, bias_attr=True), + nn.LeakyReLU(0.2), + nn.Conv2D(dim_in, 3, 1, 1, 0)) # down/up-sampling blocks repeat_num = int(np.log2(img_size)) - 4 @@ -180,28 +180,26 @@ def __init__(self, img_size=256, style_dim=64, max_conv_dim=512, w_hpf=1): for _ in range(repeat_num): dim_out = min(dim_in * 2, max_conv_dim) self.encode.append( - ResBlk(dim_in, dim_out, normalize=True, downsample=True)) + ResBlk( + dim_in, dim_out, normalize=True, downsample=True)) if len(self.decode) == 0: self.decode.append( - AdainResBlk(dim_out, - dim_in, - style_dim, - w_hpf=w_hpf, - upsample=True)) + AdainResBlk( + dim_out, dim_in, style_dim, w_hpf=w_hpf, upsample=True)) else: - self.decode.insert(0, - AdainResBlk(dim_out, - dim_in, - style_dim, - w_hpf=w_hpf, - upsample=True)) # stack-like + self.decode.insert( + 0, + AdainResBlk( + dim_out, dim_in, style_dim, w_hpf=w_hpf, + upsample=True)) # stack-like dim_in = dim_out # bottleneck blocks for _ in range(2): self.encode.append(ResBlk(dim_out, dim_out, normalize=True)) self.decode.insert( - 0, AdainResBlk(dim_out, dim_out, style_dim, w_hpf=w_hpf)) + 0, AdainResBlk( + dim_out, dim_out, style_dim, w_hpf=w_hpf)) if w_hpf > 0: self.hpf = HighPass(w_hpf) @@ -217,9 +215,8 @@ def forward(self, x, s, masks=None): x = block(x, s) if (masks is not None) and (x.shape[2] in [32, 64, 128]): mask = masks[0] if x.shape[2] in [32] else masks[1] - mask = F.interpolate(mask, - size=[x.shape[2], x.shape[2]], - mode='bilinear') + mask = F.interpolate( + mask, size=[x.shape[2], x.shape[2]], mode='bilinear') x = x + self.hpf(mask * cache[x.shape[2]]) return self.to_rgb(x) @@ -239,10 +236,12 @@ def __init__(self, latent_dim=16, style_dim=64, num_domains=2): self.unshared = nn.LayerList() for _ in range(num_domains): self.unshared.append( - nn.Sequential(nn.Linear(512, 512), - nn.ReLU(), nn.Linear(512, 512), nn.ReLU(), - nn.Linear(512, 512), nn.ReLU(), - nn.Linear(512, style_dim))) + nn.Sequential( + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, 512), + nn.ReLU(), + nn.Linear(512, 512), nn.ReLU(), nn.Linear(512, style_dim))) def forward(self, z, y): h = self.shared(z) @@ -254,8 +253,8 @@ def forward(self, z, y): s = [] for i in range(idx.shape[0]): s += [ - out[idx[i].numpy().astype(np.int).tolist()[0], - y[i].numpy().astype(np.int).tolist()[0]] + out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy() + .astype(np.int).tolist()[0]] ] s = paddle.stack(s) s = paddle.reshape(s, (s.shape[0], -1)) @@ -300,8 +299,8 @@ def forward(self, x, y): s = [] for i in range(idx.shape[0]): s += [ - out[idx[i].numpy().astype(np.int).tolist()[0], - y[i].numpy().astype(np.int).tolist()[0]] + out[idx[i].numpy().astype(np.int).tolist()[0], y[i].numpy() + .astype(np.int).tolist()[0]] ] s = paddle.stack(s) s = paddle.reshape(s, (s.shape[0], -1)) @@ -320,15 +319,16 @@ def __init__(self, self.end_relu = end_relu # Base part - self.conv1 = CoordConvTh(256, - 256, - True, - False, - in_channels=3, - out_channels=64, - kernel_size=7, - stride=2, - padding=3) + self.conv1 = CoordConvTh( + 256, + 256, + True, + False, + in_channels=3, + out_channels=64, + kernel_size=7, + stride=2, + padding=3) self.bn1 = nn.BatchNorm2D(64) self.conv2 = ConvBlock(64, 128) self.conv3 = ConvBlock(128, 128) @@ -395,9 +395,10 @@ def get_heatmap(self, x, b_preprocess=True): heatmaps = outputs[-1][:, :-1, :, :] scale_factor = x.shape[2] // heatmaps.shape[2] if b_preprocess: - heatmaps = F.interpolate(heatmaps, - scale_factor=scale_factor, - mode='bilinear', - align_corners=True) + heatmaps = F.interpolate( + heatmaps, + scale_factor=scale_factor, + mode='bilinear', + align_corners=True) heatmaps = preprocess(heatmaps) return heatmaps diff --git a/paddlers/models/ppgan/models/generators/iconvsr.py b/paddlers/models/ppgan/models/generators/iconvsr.py index f97931be..4b7b57b4 100644 --- a/paddlers/models/ppgan/models/generators/iconvsr.py +++ b/paddlers/models/ppgan/models/generators/iconvsr.py @@ -45,6 +45,7 @@ class IconVSR(nn.Layer): then the (0, 5, 10, 15, ...)-th frame will be the keyframes. Default: 5. """ + def __init__(self, mid_channels=64, num_blocks=30, @@ -64,25 +65,17 @@ def __init__(self, self.spynet.set_state_dict(paddle.load(weight_path)) # information-refill - self.edvr = EDVRFeatureExtractor(num_frames=padding * 2 + 1, - center_frame_idx=padding) + self.edvr = EDVRFeatureExtractor( + num_frames=padding * 2 + 1, center_frame_idx=padding) edvr_wight_path = get_path_from_url( 'https://paddlegan.bj.bcebos.com/models/edvrm.pdparams') self.edvr.set_state_dict(paddle.load(edvr_wight_path)) - self.backward_fusion = nn.Conv2D(2 * mid_channels, - mid_channels, - 3, - 1, - 1, - bias_attr=True) - self.forward_fusion = nn.Conv2D(2 * mid_channels, - mid_channels, - 3, - 1, - 1, - bias_attr=True) + self.backward_fusion = nn.Conv2D( + 2 * mid_channels, mid_channels, 3, 1, 1, bias_attr=True) + self.forward_fusion = nn.Conv2D( + 2 * mid_channels, mid_channels, 3, 1, 1, bias_attr=True) # propagation branches self.backward_resblocks = ResidualBlocksWithInputConv( @@ -92,19 +85,14 @@ def __init__(self, # upsample # self.fusion = nn.Conv2D(mid_channels * 2, mid_channels, 1, 1, 0) - self.upsample1 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) - self.upsample2 = PixelShufflePack(mid_channels, - 64, - 2, - upsample_kernel=3) + self.upsample1 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) + self.upsample2 = PixelShufflePack( + mid_channels, 64, 2, upsample_kernel=3) self.conv_hr = nn.Conv2D(64, 64, 3, 1, 1) self.conv_last = nn.Conv2D(64, 3, 3, 1, 1) - self.img_upsample = nn.Upsample(scale_factor=4, - mode='bilinear', - align_corners=False) + self.img_upsample = nn.Upsample( + scale_factor=4, mode='bilinear', align_corners=False) # activation function self.lrelu = nn.LeakyReLU(negative_slope=0.1) @@ -319,6 +307,7 @@ class EDVRFeatureExtractor(nn.Layer): 0. Default: 2. with_tsa (bool): Whether to use TSA module. Default: True. """ + def __init__(self, in_channels=3, out_channel=3, @@ -336,9 +325,8 @@ def __init__(self, self.with_tsa = with_tsa self.conv_first = nn.Conv2D(in_channels, mid_channels, 3, 1, 1) - self.feature_extraction = make_layer(ResidualBlockNoBN, - num_blocks_extraction, - nf=mid_channels) + self.feature_extraction = make_layer( + ResidualBlockNoBN, num_blocks_extraction, nf=mid_channels) # generate pyramid features self.feat_l2_conv1 = nn.Conv2D(mid_channels, mid_channels, 3, 2, 1) @@ -350,9 +338,10 @@ def __init__(self, self.pcd_alignment = PCDAlign(nf=mid_channels, groups=deform_groups) # fusion if self.with_tsa: - self.fusion = TSAFusion(nf=mid_channels, - nframes=num_frames, - center=self.center_frame_idx) + self.fusion = TSAFusion( + nf=mid_channels, + nframes=num_frames, + center=self.center_frame_idx) else: self.fusion = nn.Conv2D(num_frames * mid_channels, mid_channels, 1, 1) diff --git a/paddlers/models/ppgan/models/generators/lesrcnn.py b/paddlers/models/ppgan/models/generators/lesrcnn.py index 7bb2a4e9..890bc567 100644 --- a/paddlers/models/ppgan/models/generators/lesrcnn.py +++ b/paddlers/models/ppgan/models/generators/lesrcnn.py @@ -63,12 +63,14 @@ def __init__(self, n_channels, scale, group=1): if scale == 2 or scale == 4 or scale == 8: for _ in range(int(math.log(scale, 2))): modules += [ - nn.Conv2D(n_channels, 4 * n_channels, 3, 1, 1, groups=group) + nn.Conv2D( + n_channels, 4 * n_channels, 3, 1, 1, groups=group) ] modules += [nn.PixelShuffle(2)] elif scale == 3: modules += [ - nn.Conv2D(n_channels, 9 * n_channels, 3, 1, 1, groups=group) + nn.Conv2D( + n_channels, 9 * n_channels, 3, 1, 1, groups=group) ] modules += [nn.PixelShuffle(3)] @@ -89,12 +91,12 @@ class LESRCNNGenerator(nn.Layer): multi_scale (bool): Whether to train multi scale model. group (int): group option for convolution. """ + def __init__( - self, - scale=4, - multi_scale=False, - group=1, - ): + self, + scale=4, + multi_scale=False, + group=1, ): super(LESRCNNGenerator, self).__init__() kernel_size = 3 @@ -110,165 +112,197 @@ def __init__( self.add_mean = MeanShift((0.4488, 0.4371, 0.4040), sub=False) self.conv1 = nn.Sequential( - nn.Conv2D(in_channels=channels, - out_channels=features, - kernel_size=kernel_size, - padding=padding, - groups=1, - bias_attr=False)) + nn.Conv2D( + in_channels=channels, + out_channels=features, + kernel_size=kernel_size, + padding=padding, + groups=1, + bias_attr=False)) self.conv2 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv3 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv4 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv5 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv6 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv7 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv8 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv9 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv10 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv11 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv12 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv13 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv14 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv15 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv16 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv17 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size1, - padding=0, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size1, + padding=0, + groups=groups, + bias_attr=False)) self.conv17_1 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv17_2 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv17_3 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv17_4 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=features, - kernel_size=kernel_size, - padding=1, - groups=1, - bias_attr=False), nn.ReLU()) + nn.Conv2D( + in_channels=features, + out_channels=features, + kernel_size=kernel_size, + padding=1, + groups=1, + bias_attr=False), + nn.ReLU()) self.conv18 = nn.Sequential( - nn.Conv2D(in_channels=features, - out_channels=3, - kernel_size=kernel_size, - padding=padding, - groups=groups, - bias_attr=False)) + nn.Conv2D( + in_channels=features, + out_channels=3, + kernel_size=kernel_size, + padding=padding, + groups=groups, + bias_attr=False)) self.ReLU = nn.ReLU() - self.upsample = UpsampleBlock(64, - scale=scale, - multi_scale=multi_scale, - group=1) + self.upsample = UpsampleBlock( + 64, scale=scale, multi_scale=multi_scale, group=1) def forward(self, x, scale=None): if scale is None: diff --git a/paddlers/models/ppgan/models/generators/mobile_resnet.py b/paddlers/models/ppgan/models/generators/mobile_resnet.py index 6b178ab1..4c0dc9de 100644 --- a/paddlers/models/ppgan/models/generators/mobile_resnet.py +++ b/paddlers/models/ppgan/models/generators/mobile_resnet.py @@ -37,54 +37,52 @@ def __init__(self, use_bias = norm_layer == nn.InstanceNorm2D self.model = nn.LayerList([ - nn.ReflectionPad2d([3, 3, 3, 3]), - nn.Conv2D(input_channel, - int(ngf), - kernel_size=7, - padding=0, - bias_attr=use_bias), - norm_layer(ngf), - nn.ReLU() + nn.ReflectionPad2d([3, 3, 3, 3]), nn.Conv2D( + input_channel, + int(ngf), + kernel_size=7, + padding=0, + bias_attr=use_bias), norm_layer(ngf), nn.ReLU() ]) n_downsampling = 2 for i in range(n_downsampling): mult = 2**i self.model.extend([ - nn.Conv2D(ngf * mult, - ngf * mult * 2, - kernel_size=3, - stride=2, - padding=1, - bias_attr=use_bias), - norm_layer(ngf * mult * 2), - nn.ReLU() + nn.Conv2D( + ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU() ]) mult = 2**n_downsampling for i in range(n_blocks): self.model.extend([ - MobileResnetBlock(ngf * mult, - ngf * mult, - padding_type=padding_type, - norm_layer=norm_layer, - use_dropout=use_dropout, - use_bias=use_bias) + MobileResnetBlock( + ngf * mult, + ngf * mult, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=use_bias) ]) for i in range(n_downsampling): mult = 2**(n_downsampling - i) output_size = (i + 1) * 128 self.model.extend([ - nn.Conv2DTranspose(ngf * mult, - int(ngf * mult / 2), - kernel_size=3, - stride=2, - padding=1, - output_padding=1, - bias_attr=use_bias), - norm_layer(int(ngf * mult / 2)), + nn.Conv2DTranspose( + ngf * mult, + int(ngf * mult / 2), + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias_attr=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU() ]) @@ -119,13 +117,12 @@ def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout, self.padding_type) self.conv_block.extend([ - SeparableConv2D(num_channels=in_c, - num_filters=out_c, - filter_size=3, - padding=p, - stride=1), - norm_layer(out_c), - nn.ReLU() + SeparableConv2D( + num_channels=in_c, + num_filters=out_c, + filter_size=3, + padding=p, + stride=1), norm_layer(out_c), nn.ReLU() ]) self.conv_block.extend([nn.Dropout(0.5)]) @@ -141,12 +138,12 @@ def __init__(self, in_c, out_c, padding_type, norm_layer, use_dropout, self.padding_type) self.conv_block.extend([ - SeparableConv2D(num_channels=out_c, - num_filters=in_c, - filter_size=3, - padding=p, - stride=1), - norm_layer(in_c) + SeparableConv2D( + num_channels=out_c, + num_filters=in_c, + filter_size=3, + padding=p, + stride=1), norm_layer(in_c) ]) def forward(self, inputs): @@ -178,8 +175,8 @@ def __init__(self, stride=stride, padding=padding, groups=num_channels, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.Normal(loc=0.0, scale=stddev)), + weight_attr=paddle.ParamAttr(initializer=nn.initializer.Normal( + loc=0.0, scale=stddev)), bias_attr=use_bias) ]) @@ -191,8 +188,8 @@ def __init__(self, out_channels=num_filters, kernel_size=1, stride=1, - weight_attr=paddle.ParamAttr( - initializer=nn.initializer.Normal(loc=0.0, scale=stddev)), + weight_attr=paddle.ParamAttr(initializer=nn.initializer.Normal( + loc=0.0, scale=stddev)), bias_attr=use_bias) ]) diff --git a/paddlers/models/ppgan/models/generators/mpr.py b/paddlers/models/ppgan/models/generators/mpr.py index 9be802a1..aed93cb4 100644 --- a/paddlers/models/ppgan/models/generators/mpr.py +++ b/paddlers/models/ppgan/models/generators/mpr.py @@ -14,12 +14,13 @@ def conv(in_channels, out_channels, kernel_size, bias_attr=False, stride=1): - return nn.Conv2D(in_channels, - out_channels, - kernel_size, - padding=(kernel_size // 2), - bias_attr=bias_attr, - stride=stride) + return nn.Conv2D( + in_channels, + out_channels, + kernel_size, + padding=(kernel_size // 2), + bias_attr=bias_attr, + stride=stride) ## Channel Attention Layer @@ -30,16 +31,20 @@ def __init__(self, channel, reduction=16, bias_attr=False): self.avg_pool = nn.AdaptiveAvgPool2D(1) # feature channel downscale and upscale --> channel weight self.conv_du = nn.Sequential( - nn.Conv2D(channel, - channel // reduction, - 1, - padding=0, - bias_attr=bias_attr), nn.ReLU(), - nn.Conv2D(channel // reduction, - channel, - 1, - padding=0, - bias_attr=bias_attr), nn.Sigmoid()) + nn.Conv2D( + channel, + channel // reduction, + 1, + padding=0, + bias_attr=bias_attr), + nn.ReLU(), + nn.Conv2D( + channel // reduction, + channel, + 1, + padding=0, + bias_attr=bias_attr), + nn.Sigmoid()) def forward(self, x): y = self.avg_pool(x) @@ -53,10 +58,12 @@ def __init__(self, n_feat, kernel_size, reduction, bias_attr, act): super(CAB, self).__init__() modules_body = [] modules_body.append( - conv(n_feat, n_feat, kernel_size, bias_attr=bias_attr)) + conv( + n_feat, n_feat, kernel_size, bias_attr=bias_attr)) modules_body.append(act) modules_body.append( - conv(n_feat, n_feat, kernel_size, bias_attr=bias_attr)) + conv( + n_feat, n_feat, kernel_size, bias_attr=bias_attr)) self.CA = CALayer(n_feat, reduction, bias_attr=bias_attr) self.body = nn.Sequential(*modules_body) @@ -73,13 +80,15 @@ class DownSample(nn.Layer): def __init__(self, in_channels, s_factor): super(DownSample, self).__init__() self.down = nn.Sequential( - nn.Upsample(scale_factor=0.5, mode='bilinear', align_corners=False), - nn.Conv2D(in_channels, - in_channels + s_factor, - 1, - stride=1, - padding=0, - bias_attr=False)) + nn.Upsample( + scale_factor=0.5, mode='bilinear', align_corners=False), + nn.Conv2D( + in_channels, + in_channels + s_factor, + 1, + stride=1, + padding=0, + bias_attr=False)) def forward(self, x): x = self.down(x) @@ -90,13 +99,15 @@ class UpSample(nn.Layer): def __init__(self, in_channels, s_factor): super(UpSample, self).__init__() self.up = nn.Sequential( - nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), - nn.Conv2D(in_channels + s_factor, - in_channels, - 1, - stride=1, - padding=0, - bias_attr=False)) + nn.Upsample( + scale_factor=2, mode='bilinear', align_corners=False), + nn.Conv2D( + in_channels + s_factor, + in_channels, + 1, + stride=1, + padding=0, + bias_attr=False)) def forward(self, x): x = self.up(x) @@ -107,13 +118,15 @@ class SkipUpSample(nn.Layer): def __init__(self, in_channels, s_factor): super(SkipUpSample, self).__init__() self.up = nn.Sequential( - nn.Upsample(scale_factor=2, mode='bilinear', align_corners=False), - nn.Conv2D(in_channels + s_factor, - in_channels, - 1, - stride=1, - padding=0, - bias_attr=False)) + nn.Upsample( + scale_factor=2, mode='bilinear', align_corners=False), + nn.Conv2D( + in_channels + s_factor, + in_channels, + 1, + stride=1, + padding=0, + bias_attr=False)) def forward(self, x, y): x = self.up(x) @@ -156,31 +169,31 @@ def __init__(self, n_feat, kernel_size, reduction, act, bias_attr, # Cross Stage Feature Fusion (CSFF) if csff: - self.csff_enc1 = nn.Conv2D(n_feat, - n_feat, - kernel_size=1, - bias_attr=bias_attr) - self.csff_enc2 = nn.Conv2D(n_feat + scale_unetfeats, - n_feat + scale_unetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.csff_enc3 = nn.Conv2D(n_feat + (scale_unetfeats * 2), - n_feat + (scale_unetfeats * 2), - kernel_size=1, - bias_attr=bias_attr) - - self.csff_dec1 = nn.Conv2D(n_feat, - n_feat, - kernel_size=1, - bias_attr=bias_attr) - self.csff_dec2 = nn.Conv2D(n_feat + scale_unetfeats, - n_feat + scale_unetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.csff_dec3 = nn.Conv2D(n_feat + (scale_unetfeats * 2), - n_feat + (scale_unetfeats * 2), - kernel_size=1, - bias_attr=bias_attr) + self.csff_enc1 = nn.Conv2D( + n_feat, n_feat, kernel_size=1, bias_attr=bias_attr) + self.csff_enc2 = nn.Conv2D( + n_feat + scale_unetfeats, + n_feat + scale_unetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.csff_enc3 = nn.Conv2D( + n_feat + (scale_unetfeats * 2), + n_feat + (scale_unetfeats * 2), + kernel_size=1, + bias_attr=bias_attr) + + self.csff_dec1 = nn.Conv2D( + n_feat, n_feat, kernel_size=1, bias_attr=bias_attr) + self.csff_dec2 = nn.Conv2D( + n_feat + scale_unetfeats, + n_feat + scale_unetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.csff_dec3 = nn.Conv2D( + n_feat + (scale_unetfeats * 2), + n_feat + (scale_unetfeats * 2), + kernel_size=1, + bias_attr=bias_attr) def forward(self, x, encoder_outs=None, decoder_outs=None): enc1 = self.encoder_level1(x) @@ -300,36 +313,42 @@ def __init__(self, n_feat, scale_orsnetfeats, kernel_size, reduction, act, UpSample(n_feat + scale_unetfeats, scale_unetfeats), UpSample(n_feat, scale_unetfeats)) - self.conv_enc1 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.conv_enc2 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.conv_enc3 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) - - self.conv_dec1 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.conv_dec2 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) - self.conv_dec3 = nn.Conv2D(n_feat, - n_feat + scale_orsnetfeats, - kernel_size=1, - bias_attr=bias_attr) + self.conv_enc1 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.conv_enc2 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.conv_enc3 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) + + self.conv_dec1 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.conv_dec2 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) + self.conv_dec3 = nn.Conv2D( + n_feat, + n_feat + scale_orsnetfeats, + kernel_size=1, + bias_attr=bias_attr) def forward(self, x, encoder_outs, decoder_outs): x = self.orb1(x) - x = x + self.conv_enc1(encoder_outs[0]) + self.conv_dec1( - decoder_outs[0]) + x = x + self.conv_enc1(encoder_outs[0]) + self.conv_dec1(decoder_outs[ + 0]) x = self.orb2(x) x = x + self.conv_enc2(self.up_enc1(encoder_outs[1])) + self.conv_dec2( @@ -374,33 +393,38 @@ def __init__(self, super(MPRNet, self).__init__() act = nn.PReLU() self.shallow_feat1 = nn.Sequential( - conv(in_c, n_feat, kernel_size, bias_attr=bias_attr), + conv( + in_c, n_feat, kernel_size, bias_attr=bias_attr), CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act)) self.shallow_feat2 = nn.Sequential( - conv(in_c, n_feat, kernel_size, bias_attr=bias_attr), + conv( + in_c, n_feat, kernel_size, bias_attr=bias_attr), CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act)) self.shallow_feat3 = nn.Sequential( - conv(in_c, n_feat, kernel_size, bias_attr=bias_attr), + conv( + in_c, n_feat, kernel_size, bias_attr=bias_attr), CAB(n_feat, kernel_size, reduction, bias_attr=bias_attr, act=act)) # Cross Stage Feature Fusion (CSFF) - self.stage1_encoder = Encoder(n_feat, - kernel_size, - reduction, - act, - bias_attr, - scale_unetfeats, - csff=False) + self.stage1_encoder = Encoder( + n_feat, + kernel_size, + reduction, + act, + bias_attr, + scale_unetfeats, + csff=False) self.stage1_decoder = Decoder(n_feat, kernel_size, reduction, act, bias_attr, scale_unetfeats) - self.stage2_encoder = Encoder(n_feat, - kernel_size, - reduction, - act, - bias_attr, - scale_unetfeats, - csff=True) + self.stage2_encoder = Encoder( + n_feat, + kernel_size, + reduction, + act, + bias_attr, + scale_unetfeats, + csff=True) self.stage2_decoder = Decoder(n_feat, kernel_size, reduction, act, bias_attr, scale_unetfeats) @@ -411,18 +435,15 @@ def __init__(self, self.sam12 = SAM(n_feat, kernel_size=1, bias_attr=bias_attr) self.sam23 = SAM(n_feat, kernel_size=1, bias_attr=bias_attr) - self.concat12 = conv(n_feat * 2, - n_feat, - kernel_size, - bias_attr=bias_attr) - self.concat23 = conv(n_feat * 2, - n_feat + scale_orsnetfeats, - kernel_size, - bias_attr=bias_attr) - self.tail = conv(n_feat + scale_orsnetfeats, - out_c, - kernel_size, - bias_attr=bias_attr) + self.concat12 = conv( + n_feat * 2, n_feat, kernel_size, bias_attr=bias_attr) + self.concat23 = conv( + n_feat * 2, + n_feat + scale_orsnetfeats, + kernel_size, + bias_attr=bias_attr) + self.tail = conv( + n_feat + scale_orsnetfeats, out_c, kernel_size, bias_attr=bias_attr) def forward(self, x3_img): # Original-resolution Image for Stage 3 diff --git a/paddlers/models/ppgan/models/generators/msvsr.py b/paddlers/models/ppgan/models/generators/msvsr.py index 79e841cf..512c55d3 100644 --- a/paddlers/models/ppgan/models/generators/msvsr.py +++ b/paddlers/models/ppgan/models/generators/msvsr.py @@ -59,6 +59,7 @@ class MSVSR(nn.Layer): use_local_connnect (bool): Whether add feature of stage1 after upsample. Default: True. """ + def __init__(self, mid_channels=32, num_init_blocks=2, @@ -129,34 +130,30 @@ def __init__(self, (3 + i) * mid_channels, mid_channels, num_blocks) # stage1 - self.stage1_align = AlignmentModule(mid_channels, - mid_channels, - 3, - padding=1, - deformable_groups=stage1_groups) - self.stage1_blocks = ResidualBlocksWithInputConv( - 3 * mid_channels, mid_channels, 3) + self.stage1_align = AlignmentModule( + mid_channels, + mid_channels, + 3, + padding=1, + deformable_groups=stage1_groups) + self.stage1_blocks = ResidualBlocksWithInputConv(3 * mid_channels, + mid_channels, 3) # upsampling module self.reconstruction = ResidualBlocksWithInputConv( 6 * mid_channels, mid_channels, num_reconstruction_blocks) - self.upsample1 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) - self.upsample2 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) + self.upsample1 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) + self.upsample2 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) if self.only_last: self.conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1) else: self.conv_hr = nn.Conv2D(mid_channels, mid_channels, 3, 1, 1) self.conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1) - self.img_upsample = nn.Upsample(scale_factor=4, - mode='bilinear', - align_corners=False) + self.img_upsample = nn.Upsample( + scale_factor=4, mode='bilinear', align_corners=False) # activation function self.lrelu = nn.LeakyReLU(negative_slope=0.1) @@ -179,14 +176,10 @@ def __init__(self, self.aux_conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1) - self.aux_upsample1 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) - self.aux_upsample2 = PixelShufflePack(mid_channels, - mid_channels, - 2, - upsample_kernel=3) + self.aux_upsample1 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) + self.aux_upsample2 = PixelShufflePack( + mid_channels, mid_channels, 2, upsample_kernel=3) self.hybrid_conv_last = nn.Conv2D(mid_channels, 3, 3, 1, 1) def check_if_mirror_extended(self, lrs): @@ -267,8 +260,8 @@ def stage1(self, feats, flows, flows_forward=None): if i < t: feat_back = feats['spatial'][mapping_idx[idx - 1]] flow_n1_ = flows_forward[:, flow_idx[i] - 1, :, :, :] - cond_n1_ = flow_warp(feat_back, flow_n1_.transpose([0, 2, 3, - 1])) + cond_n1_ = flow_warp(feat_back, + flow_n1_.transpose([0, 2, 3, 1])) cond_ = paddle.concat([cond_n1_, feat_current], axis=1) feat_back, _, _ = self.stage1_align(feat_back, cond_, flow_n1_) else: @@ -339,8 +332,8 @@ def stage2(self, feats, flows): # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', prop_name] + feats[k][idx] for k in feats + if k not in ['spatial', prop_name] ] + [feat_prop] feat = paddle.concat(feat, axis=1) @@ -412,8 +405,8 @@ def stage3(self, # concatenate and residual blocks feat = [feat_current] + [ - feats[k][idx] - for k in feats if k not in ['spatial', prop_name] + feats[k][idx] for k in feats + if k not in ['spatial', prop_name] ] + [feat_prop] feat = paddle.concat(feat, axis=1) @@ -460,12 +453,15 @@ def auxiliary_stage(self, feats, lqs): # output tensor of auxiliary_stage with shape (n, 3, 4*h, 4*w) aux_feats['outs'].append(hr) - aux_feat = self.aux_block_down1(paddle.concat([hr, hr_high], - axis=1)) + aux_feat = self.aux_block_down1( + paddle.concat( + [hr, hr_high], axis=1)) aux_feat = self.aux_block_down2( - paddle.concat([aux_feat, hr_mid], axis=1)) - aux_feat = self.aux_fusion(paddle.concat([aux_feat, hr_low], - axis=1)) + paddle.concat( + [aux_feat, hr_mid], axis=1)) + aux_feat = self.aux_fusion( + paddle.concat( + [aux_feat, hr_low], axis=1)) # out feature of auxiliary_stage with shape (n, c, h, w) aux_feats['feats'].append(aux_feat) @@ -520,8 +516,9 @@ def upsample(self, lqs, feats, aux_feats=None): outputs.append(hr) if self.auxiliary_loss: - return paddle.stack(aux_feats['outs'], - axis=1), paddle.stack(outputs, axis=1) + return paddle.stack( + aux_feats['outs'], axis=1), paddle.stack( + outputs, axis=1) return paddle.stack(outputs, axis=1) def forward(self, lqs): @@ -547,9 +544,10 @@ def forward(self, lqs): feats['spatial'] = [feats_[:, i, :, :, :] for i in range(0, t)] # compute optical flow using the low-res inputs - assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[4] >= 64, ( - 'The height and width of low-res inputs must be at least 64, ' - f'but got {h} and {w}.') + assert lqs_downsample.shape[3] >= 64 and lqs_downsample.shape[ + 4] >= 64, ( + 'The height and width of low-res inputs must be at least 64, ' + f'but got {h} and {w}.') flows_forward, flows_backward = self.compute_flow(lqs_downsample) @@ -581,6 +579,7 @@ class AlignmentModule(nn.Layer): groups (int): Same as nn.Conv2d. deformable_groups (int): Number of deformable_groups in DeformConv2D. """ + def __init__(self, in_channels=128, out_channels=64, @@ -599,15 +598,15 @@ def __init__(self, nn.LeakyReLU(negative_slope=0.1), nn.Conv2D(out_channels, out_channels, 3, 1, 1), nn.LeakyReLU(negative_slope=0.1), - nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), - ) - self.dcn = DeformConv2D(in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - deformable_groups=deformable_groups) + nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), ) + self.dcn = DeformConv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + deformable_groups=deformable_groups) self.init_offset() @@ -642,6 +641,7 @@ class ReAlignmentModule(nn.Layer): groups (int): Same as nn.Conv2d. deformable_groups (int): Number of deformable_groups in DeformConv2D. """ + def __init__(self, in_channels=128, out_channels=64, @@ -653,13 +653,14 @@ def __init__(self, deformable_groups=16): super(ReAlignmentModule, self).__init__() - self.mdconv = DeformConv2D(in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - deformable_groups=deformable_groups) + self.mdconv = DeformConv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + deformable_groups=deformable_groups) self.conv_offset = nn.Sequential( nn.Conv2D(2 * out_channels + 2, out_channels, 3, 1, 1), nn.LeakyReLU(negative_slope=0.1), @@ -667,15 +668,15 @@ def __init__(self, nn.LeakyReLU(negative_slope=0.1), nn.Conv2D(out_channels, out_channels, 3, 1, 1), nn.LeakyReLU(negative_slope=0.1), - nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), - ) - self.dcn = DeformConv2D(in_channels, - out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - deformable_groups=deformable_groups) + nn.Conv2D(out_channels, 27 * deformable_groups, 3, 1, 1), ) + self.dcn = DeformConv2D( + in_channels, + out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + deformable_groups=deformable_groups) self.init_offset() @@ -734,6 +735,7 @@ class ModifiedSPyNet(nn.Layer): use_tiny_block (bool): Whether use tiny spynet. Default: True. """ + def __init__(self, act_cfg=dict(name='LeakyReLU'), num_blocks=6, @@ -741,7 +743,8 @@ def __init__(self, super().__init__() self.num_blocks = num_blocks self.basic_module = nn.LayerList([ - SPyNetBlock(act_cfg=act_cfg, use_tiny_block=use_tiny_block) + SPyNetBlock( + act_cfg=act_cfg, use_tiny_block=use_tiny_block) for _ in range(num_blocks) ]) @@ -781,8 +784,8 @@ def compute_flow(self, ref, supp): # flow computation flow = paddle.to_tensor( np.zeros([ - n, 2, h // (2**(self.num_blocks - 1)), w // - (2**(self.num_blocks - 1)) + n, 2, h // (2**(self.num_blocks - 1)), w // (2**(self.num_blocks + - 1)) ], 'float32')) for level in range(len(ref)): @@ -794,13 +797,14 @@ def compute_flow(self, ref, supp): align_corners=True) * 2.0 # add the residue to the upsampled flow - flow = flow_up + self.basic_module[level](paddle.concat([ - ref[level], - flow_warp(supp[level], - flow_up.transpose([0, 2, 3, 1]), - padding_mode='border'), flow_up - ], - axis=1)) + flow = flow_up + self.basic_module[level](paddle.concat( + [ + ref[level], flow_warp( + supp[level], + flow_up.transpose([0, 2, 3, 1]), + padding_mode='border'), flow_up + ], + axis=1)) return flow @@ -822,8 +826,8 @@ def compute_flow_list(self, ref, supp): flow_list = [] flow = paddle.to_tensor( np.zeros([ - n, 2, h // (2**(self.num_blocks - 1)), w // - (2**(self.num_blocks - 1)) + n, 2, h // (2**(self.num_blocks - 1)), w // (2**(self.num_blocks + - 1)) ], 'float32')) for level in range(len(ref)): if level == 0: @@ -834,13 +838,14 @@ def compute_flow_list(self, ref, supp): align_corners=True) * 2.0 # add the residue to the upsampled flow - flow = flow_up + self.basic_module[level](paddle.concat([ - ref[level], - flow_warp(supp[level], - flow_up.transpose([0, 2, 3, 1]), - padding_mode='border'), flow_up - ], - axis=1)) + flow = flow_up + self.basic_module[level](paddle.concat( + [ + ref[level], flow_warp( + supp[level], + flow_up.transpose([0, 2, 3, 1]), + padding_mode='border'), flow_up + ], + axis=1)) flow_list.append(flow) return flow_list @@ -861,24 +866,21 @@ def forward(self, ref, supp): h, w = ref.shape[2:4] w_up = w if (w % 32) == 0 else 32 * (w // 32 + 1) h_up = h if (h % 32) == 0 else 32 * (h // 32 + 1) - ref = F.interpolate(ref, - size=(h_up, w_up), - mode='bilinear', - align_corners=False) + ref = F.interpolate( + ref, size=(h_up, w_up), mode='bilinear', align_corners=False) - supp = F.interpolate(supp, - size=(h_up, w_up), - mode='bilinear', - align_corners=False) + supp = F.interpolate( + supp, size=(h_up, w_up), mode='bilinear', align_corners=False) ref.stop_gradient = False supp.stop_gradient = False # compute flow, and resize back to the original resolution - flow = F.interpolate(self.compute_flow(ref, supp), - size=(h, w), - mode='bilinear', - align_corners=False) + flow = F.interpolate( + self.compute_flow(ref, supp), + size=(h, w), + mode='bilinear', + align_corners=False) # adjust the flow values flow[:, 0, :, :] *= float(w) / float(w_up) @@ -891,174 +893,202 @@ class SPyNetBlock(nn.Layer): """Basic Block of Modified SPyNet. refer to Optical Flow Estimation using a Spatial Pyramid Network, CVPR, 2017 """ + def __init__(self, act_cfg=dict(name='LeakyReLU'), use_tiny_block=False): super().__init__() if use_tiny_block: self.basic_module = nn.Sequential( - ConvLayer(in_channels=8, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=8, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=8, - out_channels=8, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=8, - out_channels=2, - kernel_size=3, - stride=1, - padding=1, - act_cfg=None)) + ConvLayer( + in_channels=8, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=8, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=8, + out_channels=8, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=8, + out_channels=2, + kernel_size=3, + stride=1, + padding=1, + act_cfg=None)) else: self.basic_module = nn.Sequential( - ConvLayer(in_channels=8, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=64, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=64, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=32, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=32, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=16, - kernel_size=3, - stride=1, - padding=1, - act_cfg=act_cfg), - ConvLayer(in_channels=16, - out_channels=2, - kernel_size=3, - stride=1, - padding=1, - act_cfg=None)) + ConvLayer( + in_channels=8, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=64, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=64, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=32, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=32, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=16, + kernel_size=3, + stride=1, + padding=1, + act_cfg=act_cfg), + ConvLayer( + in_channels=16, + out_channels=2, + kernel_size=3, + stride=1, + padding=1, + act_cfg=None)) def forward(self, tensor_input): """Forward function of SPyNetBlock. @@ -1087,13 +1117,14 @@ def __init__(self, self.act_cfg = act_cfg self.with_activation = act_cfg is not None - self.conv = nn.Conv2D(in_channels=in_channels, - out_channels=out_channels, - kernel_size=kernel_size, - stride=stride, - padding=padding, - dilation=dilation, - groups=groups) + self.conv = nn.Conv2D( + in_channels=in_channels, + out_channels=out_channels, + kernel_size=kernel_size, + stride=stride, + padding=padding, + dilation=dilation, + groups=groups) if self.with_activation: if act_cfg['name'] == 'ReLU': diff --git a/paddlers/models/ppgan/models/generators/occlusion_aware.py b/paddlers/models/ppgan/models/generators/occlusion_aware.py index 7558caca..6e31102b 100644 --- a/paddlers/models/ppgan/models/generators/occlusion_aware.py +++ b/paddlers/models/ppgan/models/generators/occlusion_aware.py @@ -17,6 +17,7 @@ class OcclusionAwareGenerator(nn.Layer): Generator that given source image and and keypoints try to transform image according to movement trajectories induced by keypoints. Generator follows Johnson architecture. """ + def __init__(self, num_channels, num_kp, @@ -43,28 +44,31 @@ def __init__(self, if mobile_net: self.first = nn.Sequential( - SameBlock2d(num_channels, - num_channels, - kernel_size=3, - padding=1, - mobile_net=mobile_net), - SameBlock2d(num_channels, - num_channels, - kernel_size=3, - padding=1, - mobile_net=mobile_net), - SameBlock2d(num_channels, - block_expansion, - kernel_size=3, - padding=1, - mobile_net=mobile_net) - ) + SameBlock2d( + num_channels, + num_channels, + kernel_size=3, + padding=1, + mobile_net=mobile_net), + SameBlock2d( + num_channels, + num_channels, + kernel_size=3, + padding=1, + mobile_net=mobile_net), + SameBlock2d( + num_channels, + block_expansion, + kernel_size=3, + padding=1, + mobile_net=mobile_net)) else: - self.first = SameBlock2d(num_channels, - block_expansion, - kernel_size=(7, 7), - padding=(3, 3), - mobile_net=mobile_net) + self.first = SameBlock2d( + num_channels, + block_expansion, + kernel_size=(7, 7), + padding=(3, 3), + mobile_net=mobile_net) down_blocks = [] if mobile_net: @@ -72,19 +76,21 @@ def __init__(self, in_features = min(max_features, block_expansion * (2**i)) out_features = min(max_features, block_expansion * (2**(i + 1))) down_blocks.append( - MobileDownBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + MobileDownBlock2d( + in_features, + out_features, + kernel_size=(3, 3), + padding=(1, 1))) else: for i in range(num_down_blocks): in_features = min(max_features, block_expansion * (2**i)) out_features = min(max_features, block_expansion * (2**(i + 1))) down_blocks.append( - DownBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + DownBlock2d( + in_features, + out_features, + kernel_size=(3, 3), + padding=(1, 1))) self.down_blocks = nn.LayerList(down_blocks) up_blocks = [] @@ -92,26 +98,26 @@ def __init__(self, for i in range(num_down_blocks): in_features = min(max_features, block_expansion * (2**(num_down_blocks - i))) - out_features = min( - max_features, - block_expansion * (2**(num_down_blocks - i - 1))) + out_features = min(max_features, block_expansion * + (2**(num_down_blocks - i - 1))) up_blocks.append( - MobileUpBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + MobileUpBlock2d( + in_features, + out_features, + kernel_size=(3, 3), + padding=(1, 1))) else: for i in range(num_down_blocks): in_features = min(max_features, block_expansion * (2**(num_down_blocks - i))) - out_features = min( - max_features, - block_expansion * (2**(num_down_blocks - i - 1))) + out_features = min(max_features, block_expansion * + (2**(num_down_blocks - i - 1))) up_blocks.append( - UpBlock2d(in_features, - out_features, - kernel_size=(3, 3), - padding=(1, 1))) + UpBlock2d( + in_features, + out_features, + kernel_size=(3, 3), + padding=(1, 1))) self.up_blocks = nn.LayerList(up_blocks) self.bottleneck = paddle.nn.Sequential() @@ -120,39 +126,42 @@ def __init__(self, for i in range(num_bottleneck_blocks): self.bottleneck.add_sublayer( 'r' + str(i), - MobileResBlock2d(in_features, - kernel_size=(3, 3), - padding=(1, 1))) + MobileResBlock2d( + in_features, kernel_size=(3, 3), padding=(1, 1))) else: for i in range(num_bottleneck_blocks): self.bottleneck.add_sublayer( 'r' + str(i), - ResBlock2d(in_features, kernel_size=(3, 3), padding=(1, 1))) + ResBlock2d( + in_features, kernel_size=(3, 3), padding=(1, 1))) if mobile_net: self.final = nn.Sequential( - nn.Conv2D(block_expansion, - block_expansion, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1), - nn.ReLU(), - nn.Conv2D(block_expansion, - block_expansion, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1), - nn.ReLU(), - nn.Conv2D(block_expansion, - num_channels, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1) - ) + nn.Conv2D( + block_expansion, + block_expansion, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1), + nn.ReLU(), + nn.Conv2D( + block_expansion, + block_expansion, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1), + nn.ReLU(), + nn.Conv2D( + block_expansion, + num_channels, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1)) else: - self.final = nn.Conv2D(block_expansion, - num_channels, - kernel_size=(7, 7), - padding=(3, 3)) + self.final = nn.Conv2D( + block_expansion, + num_channels, + kernel_size=(7, 7), + padding=(3, 3)) self.estimate_occlusion_map = estimate_occlusion_map self.num_channels = num_channels self.inference = inference @@ -164,30 +173,30 @@ def deform_input(self, inp, deformation): _, _, h, w = inp.shape if h_old != h or w_old != w: deformation = deformation.transpose([0, 3, 1, 2]) - deformation = F.interpolate(deformation, - size=(h, w), - mode='bilinear', - align_corners=False) + deformation = F.interpolate( + deformation, size=(h, w), mode='bilinear', align_corners=False) deformation = deformation.transpose([0, 2, 3, 1]) if self.inference: identity_grid = make_coordinate_grid((h, w), type=inp.dtype) identity_grid = identity_grid.reshape([1, h, w, 2]) visualization_matrix = np.zeros((h, w)).astype("float32") - visualization_matrix[self.pad:h - self.pad, - self.pad:w - self.pad] = 1.0 + visualization_matrix[self.pad:h - self.pad, self.pad:w - + self.pad] = 1.0 gauss_kernel = paddle.to_tensor( - cv2.GaussianBlur(visualization_matrix, (9, 9), - 0.0, - borderType=cv2.BORDER_ISOLATED)) + cv2.GaussianBlur( + visualization_matrix, (9, 9), + 0.0, + borderType=cv2.BORDER_ISOLATED)) gauss_kernel = gauss_kernel.unsqueeze(0).unsqueeze(-1) - deformation = gauss_kernel * deformation + ( - 1 - gauss_kernel) * identity_grid + deformation = gauss_kernel * deformation + (1 - gauss_kernel + ) * identity_grid - return F.grid_sample(inp, - deformation, - mode='bilinear', - padding_mode='zeros', - align_corners=True) + return F.grid_sample( + inp, + deformation, + mode='bilinear', + padding_mode='zeros', + align_corners=True) def forward(self, source_image, kp_driving, kp_source): # Encoding (downsampling) part @@ -198,9 +207,10 @@ def forward(self, source_image, kp_driving, kp_source): # Transforming feature representation according to deformation and occlusion output_dict = {} if self.dense_motion_network is not None: - dense_motion = self.dense_motion_network(source_image=source_image, - kp_driving=kp_driving, - kp_source=kp_source) + dense_motion = self.dense_motion_network( + source_image=source_image, + kp_driving=kp_driving, + kp_source=kp_source) output_dict['mask'] = dense_motion['mask'] output_dict['sparse_deformed'] = dense_motion['sparse_deformed'] @@ -215,10 +225,11 @@ def forward(self, source_image, kp_driving, kp_source): if occlusion_map is not None: if out.shape[2] != occlusion_map.shape[2] or out.shape[ 3] != occlusion_map.shape[3]: - occlusion_map = F.interpolate(occlusion_map, - size=out.shape[2:], - mode='bilinear', - align_corners=False) + occlusion_map = F.interpolate( + occlusion_map, + size=out.shape[2:], + mode='bilinear', + align_corners=False) if self.inference and not self.mobile_net: h, w = occlusion_map.shape[2:] occlusion_map[:, :, 0:self.pad, :] = 1.0 diff --git a/paddlers/models/ppgan/models/generators/pan.py b/paddlers/models/ppgan/models/generators/pan.py index 35624274..43bd7c37 100644 --- a/paddlers/models/ppgan/models/generators/pan.py +++ b/paddlers/models/ppgan/models/generators/pan.py @@ -41,6 +41,7 @@ def make_multi_blocks(func, num_layers): class PA(nn.Layer): '''PA is pixel attention''' + def __init__(self, nf): super(PA, self).__init__() @@ -62,16 +63,18 @@ def __init__(self, nf, k_size=3): super(PAConv, self).__init__() self.k2 = nn.Conv2D(nf, nf, 1) # 1x1 convolution nf->nf self.sigmoid = nn.Sigmoid() - self.k3 = nn.Conv2D(nf, - nf, - kernel_size=k_size, - padding=(k_size - 1) // 2, - bias_attr=False) # 3x3 convolution - self.k4 = nn.Conv2D(nf, - nf, - kernel_size=k_size, - padding=(k_size - 1) // 2, - bias_attr=False) # 3x3 convolution + self.k3 = nn.Conv2D( + nf, + nf, + kernel_size=k_size, + padding=(k_size - 1) // 2, + bias_attr=False) # 3x3 convolution + self.k4 = nn.Conv2D( + nf, + nf, + kernel_size=k_size, + padding=(k_size - 1) // 2, + bias_attr=False) # 3x3 convolution def forward(self, x): @@ -88,34 +91,30 @@ class SCPA(nn.Layer): """ SCPA is modified from SCNet (Jiang-Jiang Liu et al. Improving Convolutional Networks with Self-Calibrated Convolutions. In CVPR, 2020) """ + def __init__(self, nf, reduction=2, stride=1, dilation=1): super(SCPA, self).__init__() group_width = nf // reduction - self.conv1_a = nn.Conv2D(nf, - group_width, - kernel_size=1, - bias_attr=False) - self.conv1_b = nn.Conv2D(nf, - group_width, - kernel_size=1, - bias_attr=False) + self.conv1_a = nn.Conv2D( + nf, group_width, kernel_size=1, bias_attr=False) + self.conv1_b = nn.Conv2D( + nf, group_width, kernel_size=1, bias_attr=False) self.k1 = nn.Sequential( - nn.Conv2D(group_width, - group_width, - kernel_size=3, - stride=stride, - padding=dilation, - dilation=dilation, - bias_attr=False)) + nn.Conv2D( + group_width, + group_width, + kernel_size=3, + stride=stride, + padding=dilation, + dilation=dilation, + bias_attr=False)) self.PAConv = PAConv(group_width) - self.conv3 = nn.Conv2D(group_width * reduction, - nf, - kernel_size=1, - bias_attr=False) + self.conv3 = nn.Conv2D( + group_width * reduction, nf, kernel_size=1, bias_attr=False) self.lrelu = nn.LeakyReLU(negative_slope=0.2) @@ -174,24 +173,25 @@ def forward(self, x): if self.scale == 2 or self.scale == 3: fea = self.upconv1( - F.interpolate(fea, scale_factor=self.scale, mode='nearest')) + F.interpolate( + fea, scale_factor=self.scale, mode='nearest')) fea = self.lrelu(self.att1(fea)) fea = self.lrelu(self.HRconv1(fea)) elif self.scale == 4: fea = self.upconv1( - F.interpolate(fea, scale_factor=2, mode='nearest')) + F.interpolate( + fea, scale_factor=2, mode='nearest')) fea = self.lrelu(self.att1(fea)) fea = self.lrelu(self.HRconv1(fea)) fea = self.upconv2( - F.interpolate(fea, scale_factor=2, mode='nearest')) + F.interpolate( + fea, scale_factor=2, mode='nearest')) fea = self.lrelu(self.att2(fea)) fea = self.lrelu(self.HRconv2(fea)) out = self.conv_last(fea) - ILR = F.interpolate(x, - scale_factor=self.scale, - mode='bilinear', - align_corners=False) + ILR = F.interpolate( + x, scale_factor=self.scale, mode='bilinear', align_corners=False) out = out + ILR return out diff --git a/paddlers/models/ppgan/models/generators/remaster.py b/paddlers/models/ppgan/models/generators/remaster.py index 80340216..ce64377e 100644 --- a/paddlers/models/ppgan/models/generators/remaster.py +++ b/paddlers/models/ppgan/models/generators/remaster.py @@ -25,11 +25,12 @@ def __init__(self, stride=(1, 1, 1), padding=(0, 1, 1)): super(TempConv, self).__init__() - self.conv3d = nn.Conv3D(in_planes, - out_planes, - kernel_size=kernel_size, - stride=stride, - padding=padding) + self.conv3d = nn.Conv3D( + in_planes, + out_planes, + kernel_size=kernel_size, + stride=stride, + padding=padding) self.bn = nn.BatchNorm(out_planes) def forward(self, x): @@ -40,11 +41,12 @@ class Upsample(nn.Layer): def __init__(self, in_planes, out_planes, scale_factor=(1, 2, 2)): super(Upsample, self).__init__() self.scale_factor = scale_factor - self.conv3d = nn.Conv3D(in_planes, - out_planes, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1)) + self.conv3d = nn.Conv3D( + in_planes, + out_planes, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) self.bn = nn.BatchNorm(out_planes) def forward(self, x): @@ -55,22 +57,24 @@ def forward(self, x): return F.elu( self.bn( self.conv3d( - F.interpolate(x, - size=out_size, - mode='trilinear', - align_corners=False, - data_format='NCDHW', - align_mode=0)))) + F.interpolate( + x, + size=out_size, + mode='trilinear', + align_corners=False, + data_format='NCDHW', + align_mode=0)))) class UpsampleConcat(nn.Layer): def __init__(self, in_planes_up, in_planes_flat, out_planes): super(UpsampleConcat, self).__init__() - self.conv3d = TempConv(in_planes_up + in_planes_flat, - out_planes, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1)) + self.conv3d = TempConv( + in_planes_up + in_planes_flat, + out_planes, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) def forward(self, x1, x2): scale_factor = (1, 2, 2) @@ -78,12 +82,13 @@ def forward(self, x1, x2): for i in range(3): out_size[i] = scale_factor[i] * out_size[i] - x1 = F.interpolate(x1, - size=out_size, - mode='trilinear', - align_corners=False, - data_format='NCDHW', - align_mode=0) + x1 = F.interpolate( + x1, + size=out_size, + mode='trilinear', + align_corners=False, + data_format='NCDHW', + align_mode=0) x = paddle.concat([x1, x2], axis=1) return self.conv3d(x) @@ -98,17 +103,19 @@ class SourceReferenceAttention(nn.Layer): in_planes_r (int): Number of input reference feature vector channels. """ + def __init__(self, in_planes_s, in_planes_r): super(SourceReferenceAttention, self).__init__() - self.query_conv = nn.Conv3D(in_channels=in_planes_s, - out_channels=in_planes_s // 8, - kernel_size=1) - self.key_conv = nn.Conv3D(in_channels=in_planes_r, - out_channels=in_planes_r // 8, - kernel_size=1) - self.value_conv = nn.Conv3D(in_channels=in_planes_r, - out_channels=in_planes_r, - kernel_size=1) + self.query_conv = nn.Conv3D( + in_channels=in_planes_s, + out_channels=in_planes_s // 8, + kernel_size=1) + self.key_conv = nn.Conv3D( + in_channels=in_planes_r, + out_channels=in_planes_r // 8, + kernel_size=1) + self.value_conv = nn.Conv3D( + in_channels=in_planes_r, out_channels=in_planes_r, kernel_size=1) self.gamma = self.create_parameter( shape=[1], dtype=self.query_conv.weight.dtype, @@ -118,16 +125,16 @@ def forward(self, source, reference): s_batchsize, sC, sT, sH, sW = source.shape r_batchsize, rC, rT, rH, rW = reference.shape - proj_query = paddle.reshape(self.query_conv(source), - [s_batchsize, -1, sT * sH * sW]) + proj_query = paddle.reshape( + self.query_conv(source), [s_batchsize, -1, sT * sH * sW]) proj_query = paddle.transpose(proj_query, [0, 2, 1]) - proj_key = paddle.reshape(self.key_conv(reference), - [r_batchsize, -1, rT * rW * rH]) + proj_key = paddle.reshape( + self.key_conv(reference), [r_batchsize, -1, rT * rW * rH]) energy = paddle.bmm(proj_query, proj_key) attention = F.softmax(energy) - proj_value = paddle.reshape(self.value_conv(reference), - [r_batchsize, -1, rT * rH * rW]) + proj_value = paddle.reshape( + self.value_conv(reference), [r_batchsize, -1, rT * rH * rW]) out = paddle.bmm(proj_value, paddle.transpose(attention, [0, 2, 1])) out = paddle.reshape(out, [s_batchsize, sC, sT, sH, sW]) @@ -140,32 +147,44 @@ def __init__(self): super(NetworkR, self).__init__() self.layers = nn.Sequential( - nn.Pad3D((1, 1, 1, 1, 1, 1), mode='replicate'), - TempConv(1, - 64, - kernel_size=(3, 3, 3), - stride=(1, 2, 2), - padding=(0, 0, 0)), - TempConv(64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(128, - 256, - kernel_size=(3, 3, 3), - stride=(1, 2, 2), - padding=(1, 1, 1)), - TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + nn.Pad3D( + (1, 1, 1, 1, 1, 1), mode='replicate'), + TempConv( + 1, + 64, + kernel_size=(3, 3, 3), + stride=(1, 2, 2), + padding=(0, 0, 0)), + TempConv( + 64, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 128, 128, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 128, + 256, + kernel_size=(3, 3, 3), + stride=(1, 2, 2), + padding=(1, 1, 1)), + TempConv( + 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 256, 256, kernel_size=(3, 3, 3), padding=(1, 1, 1)), Upsample(256, 128), - TempConv(128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), - TempConv(64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 128, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), + TempConv( + 64, 64, kernel_size=(3, 3, 3), padding=(1, 1, 1)), Upsample(64, 16), - nn.Conv3D(16, - 1, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1))) + nn.Conv3D( + 16, + 1, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) def forward(self, x): return paddle.clip( @@ -177,17 +196,25 @@ def __init__(self): super(NetworkC, self).__init__() self.down1 = nn.Sequential( - nn.Pad3D((1, 1, 1, 1, 0, 0), mode='replicate'), - TempConv(1, 64, stride=(1, 2, 2), padding=(0, 0, 0)), - TempConv(64, 128), TempConv(128, 128), - TempConv(128, 256, stride=(1, 2, 2)), TempConv(256, 256), - TempConv(256, 256), TempConv(256, 512, stride=(1, 2, 2)), - TempConv(512, 512), TempConv(512, 512)) + nn.Pad3D( + (1, 1, 1, 1, 0, 0), mode='replicate'), + TempConv( + 1, 64, stride=(1, 2, 2), padding=(0, 0, 0)), + TempConv(64, 128), + TempConv(128, 128), + TempConv( + 128, 256, stride=(1, 2, 2)), + TempConv(256, 256), + TempConv(256, 256), + TempConv( + 256, 512, stride=(1, 2, 2)), + TempConv(512, 512), + TempConv(512, 512)) self.flat = nn.Sequential(TempConv(512, 512), TempConv(512, 512)) self.down2 = nn.Sequential( - TempConv(512, 512, stride=(1, 2, 2)), - TempConv(512, 512), - ) + TempConv( + 512, 512, stride=(1, 2, 2)), + TempConv(512, 512), ) self.stattn1 = SourceReferenceAttention( 512, 512) # Source-Reference Attention self.stattn2 = SourceReferenceAttention( @@ -196,48 +223,54 @@ def __init__(self): self.conv1 = TempConv(512, 512) self.up1 = UpsampleConcat(512, 512, 512) # 1/8 self.selfattn2 = SourceReferenceAttention(512, 512) # Self Attention - self.conv2 = TempConv(512, - 256, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1)) + self.conv2 = TempConv( + 512, + 256, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1)) self.up2 = nn.Sequential( Upsample(256, 128), # 1/4 - TempConv(128, - 64, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1))) + TempConv( + 128, + 64, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) self.up3 = nn.Sequential( Upsample(64, 32), # 1/2 - TempConv(32, - 16, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1))) + TempConv( + 32, + 16, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) self.up4 = nn.Sequential( Upsample(16, 8), # 1/1 - nn.Conv3D(8, - 2, - kernel_size=(3, 3, 3), - stride=(1, 1, 1), - padding=(1, 1, 1))) + nn.Conv3D( + 8, + 2, + kernel_size=(3, 3, 3), + stride=(1, 1, 1), + padding=(1, 1, 1))) self.reffeatnet1 = nn.Sequential( - TempConv(3, 64, stride=(1, 2, 2)), + TempConv( + 3, 64, stride=(1, 2, 2)), TempConv(64, 128), TempConv(128, 128), - TempConv(128, 256, stride=(1, 2, 2)), + TempConv( + 128, 256, stride=(1, 2, 2)), TempConv(256, 256), TempConv(256, 256), - TempConv(256, 512, stride=(1, 2, 2)), + TempConv( + 256, 512, stride=(1, 2, 2)), TempConv(512, 512), - TempConv(512, 512), - ) + TempConv(512, 512), ) self.reffeatnet2 = nn.Sequential( - TempConv(512, 512, stride=(1, 2, 2)), - TempConv(512, 512), + TempConv( + 512, 512, stride=(1, 2, 2)), TempConv(512, 512), - ) + TempConv(512, 512), ) def forward(self, x, x_refs=None): x1 = self.down1(x - 0.4462414) diff --git a/paddlers/models/ppgan/models/generators/resnet.py b/paddlers/models/ppgan/models/generators/resnet.py index 3c30d1ae..9ce93c37 100644 --- a/paddlers/models/ppgan/models/generators/resnet.py +++ b/paddlers/models/ppgan/models/generators/resnet.py @@ -39,6 +39,7 @@ class ResnetGenerator(nn.Layer): padding_type (str): the name of padding layer in conv layers: reflect | replicate | zero """ + def __init__(self, input_nc, output_nc, @@ -58,52 +59,48 @@ def __init__(self, use_bias = norm_layer == nn.InstanceNorm2D model = [ - nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect"), - nn.Conv2D(input_nc, - ngf, - kernel_size=7, - padding=0, - bias_attr=use_bias), - norm_layer(ngf), - nn.ReLU() + nn.Pad2D( + padding=[3, 3, 3, 3], mode="reflect"), nn.Conv2D( + input_nc, ngf, kernel_size=7, padding=0, + bias_attr=use_bias), norm_layer(ngf), nn.ReLU() ] n_downsampling = 2 for i in range(n_downsampling): # add downsampling layers mult = 2**i model += [ - nn.Conv2D(ngf * mult, - ngf * mult * 2, - kernel_size=3, - stride=2, - padding=1, - bias_attr=use_bias), - norm_layer(ngf * mult * 2), - nn.ReLU() + nn.Conv2D( + ngf * mult, + ngf * mult * 2, + kernel_size=3, + stride=2, + padding=1, + bias_attr=use_bias), norm_layer(ngf * mult * 2), nn.ReLU() ] mult = 2**n_downsampling for i in range(n_blocks): # add ResNet blocks model += [ - ResnetBlock(ngf * mult, - padding_type=padding_type, - norm_layer=norm_layer, - use_dropout=use_dropout, - use_bias=use_bias) + ResnetBlock( + ngf * mult, + padding_type=padding_type, + norm_layer=norm_layer, + use_dropout=use_dropout, + use_bias=use_bias) ] for i in range(n_downsampling): # add upsampling layers mult = 2**(n_downsampling - i) model += [ - nn.Conv2DTranspose(ngf * mult, - int(ngf * mult / 2), - kernel_size=3, - stride=2, - padding=1, - output_padding=1, - bias_attr=use_bias), - norm_layer(int(ngf * mult / 2)), + nn.Conv2DTranspose( + ngf * mult, + int(ngf * mult / 2), + kernel_size=3, + stride=2, + padding=1, + output_padding=1, + bias_attr=use_bias), norm_layer(int(ngf * mult / 2)), nn.ReLU() ] model += [nn.Pad2D(padding=[3, 3, 3, 3], mode="reflect")] @@ -119,6 +116,7 @@ def forward(self, x): class ResnetBlock(nn.Layer): """Define a Resnet block""" + def __init__(self, dim, padding_type, norm_layer, use_dropout, use_bias): """Initialize the Resnet block @@ -155,9 +153,9 @@ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, padding_type) conv_block += [ - nn.Conv2D(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), - norm_layer(dim), - nn.ReLU() + nn.Conv2D( + dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), + norm_layer(dim), nn.ReLU() ] if use_dropout: conv_block += [nn.Dropout(0.5)] @@ -171,7 +169,8 @@ def build_conv_block(self, dim, padding_type, norm_layer, use_dropout, raise NotImplementedError('padding [%s] is not implemented' % padding_type) conv_block += [ - nn.Conv2D(dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), + nn.Conv2D( + dim, dim, kernel_size=3, padding=p, bias_attr=use_bias), norm_layer(dim) ] diff --git a/paddlers/models/ppgan/models/generators/rrdb_net.py b/paddlers/models/ppgan/models/generators/rrdb_net.py index f69d42b3..639faa2d 100644 --- a/paddlers/models/ppgan/models/generators/rrdb_net.py +++ b/paddlers/models/ppgan/models/generators/rrdb_net.py @@ -44,6 +44,7 @@ def forward(self, x): class RRDB(nn.Layer): '''Residual in Residual Dense Block''' + def __init__(self, nf, gc=32): super(RRDB, self).__init__() self.RDB1 = ResidualDenseBlock_5C(nf, gc) @@ -87,9 +88,11 @@ def forward(self, x): fea = fea + trunk fea = self.lrelu( - self.upconv1(F.interpolate(fea, scale_factor=2, mode='nearest'))) + self.upconv1(F.interpolate( + fea, scale_factor=2, mode='nearest'))) fea = self.lrelu( - self.upconv2(F.interpolate(fea, scale_factor=2, mode='nearest'))) + self.upconv2(F.interpolate( + fea, scale_factor=2, mode='nearest'))) out = self.conv_last(self.lrelu(self.HRconv(fea))) return out diff --git a/paddlers/models/ppgan/models/generators/unet.py b/paddlers/models/ppgan/models/generators/unet.py index f8c2a1b1..bb670bdd 100644 --- a/paddlers/models/ppgan/models/generators/unet.py +++ b/paddlers/models/ppgan/models/generators/unet.py @@ -24,6 +24,7 @@ @GENERATORS.register() class UnetGenerator(nn.Layer): """Create a Unet-based generator""" + def __init__(self, input_nc, output_nc, @@ -57,28 +58,32 @@ def __init__(self, innermost=True) # add the innermost layer for i in range(num_downs - 5): # add intermediate layers with ngf * 8 filters - unet_block = UnetSkipConnectionBlock(ngf * 8, - ngf * 8, - input_nc=None, - submodule=unet_block, - norm_layer=norm_layer, - use_dropout=use_dropout) + unet_block = UnetSkipConnectionBlock( + ngf * 8, + ngf * 8, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer, + use_dropout=use_dropout) # gradually reduce the number of filters from ngf * 8 to ngf - unet_block = UnetSkipConnectionBlock(ngf * 4, - ngf * 8, - input_nc=None, - submodule=unet_block, - norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(ngf * 2, - ngf * 4, - input_nc=None, - submodule=unet_block, - norm_layer=norm_layer) - unet_block = UnetSkipConnectionBlock(ngf, - ngf * 2, - input_nc=None, - submodule=unet_block, - norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock( + ngf * 4, + ngf * 8, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock( + ngf * 2, + ngf * 4, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) + unet_block = UnetSkipConnectionBlock( + ngf, + ngf * 2, + input_nc=None, + submodule=unet_block, + norm_layer=norm_layer) self.model = UnetSkipConnectionBlock( output_nc, ngf, @@ -97,6 +102,7 @@ class UnetSkipConnectionBlock(nn.Layer): X -------------------identity---------------------- |-- downsampling -- |submodule| -- upsampling --| """ + def __init__(self, outer_nc, inner_nc, @@ -126,43 +132,43 @@ def __init__(self, use_bias = norm_layer == nn.InstanceNorm2D if input_nc is None: input_nc = outer_nc - downconv = nn.Conv2D(input_nc, - inner_nc, - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias) + downconv = nn.Conv2D( + input_nc, + inner_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) downrelu = nn.LeakyReLU(0.2) downnorm = norm_layer(inner_nc) uprelu = nn.ReLU() upnorm = norm_layer(outer_nc) if outermost: - upconv = nn.Conv2DTranspose(inner_nc * 2, - outer_nc, - kernel_size=4, - stride=2, - padding=1) + upconv = nn.Conv2DTranspose( + inner_nc * 2, outer_nc, kernel_size=4, stride=2, padding=1) down = [downconv] up = [uprelu, upconv, nn.Tanh()] model = down + [submodule] + up elif innermost: - upconv = nn.Conv2DTranspose(inner_nc, - outer_nc, - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias) + upconv = nn.Conv2DTranspose( + inner_nc, + outer_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) down = [downrelu, downconv] up = [uprelu, upconv, upnorm] model = down + up else: - upconv = nn.Conv2DTranspose(inner_nc * 2, - outer_nc, - kernel_size=4, - stride=2, - padding=1, - bias_attr=use_bias) + upconv = nn.Conv2DTranspose( + inner_nc * 2, + outer_nc, + kernel_size=4, + stride=2, + padding=1, + bias_attr=use_bias) down = [downrelu, downconv, downnorm] up = [uprelu, upconv, upnorm] diff --git a/paddlers/models/ppgan/models/generators/wav2lip.py b/paddlers/models/ppgan/models/generators/wav2lip.py index 5c8b0c94..33ee0f87 100644 --- a/paddlers/models/ppgan/models/generators/wav2lip.py +++ b/paddlers/models/ppgan/models/generators/wav2lip.py @@ -16,271 +16,282 @@ class Wav2Lip(nn.Layer): def __init__(self): super(Wav2Lip, self).__init__() - self.face_encoder_blocks = nn.LayerList([ - nn.Sequential(ConvBNRelu(6, 16, kernel_size=7, stride=1, - padding=3)), - nn.Sequential( - ConvBNRelu(16, 32, kernel_size=3, stride=2, padding=1), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True)), - nn.Sequential( - ConvBNRelu(32, 64, kernel_size=3, stride=2, padding=1), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True)), - nn.Sequential( - ConvBNRelu(64, 128, kernel_size=3, stride=2, padding=1), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True)), - nn.Sequential( - ConvBNRelu(128, 256, kernel_size=3, stride=2, padding=1), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True)), - nn.Sequential( - ConvBNRelu(256, 512, kernel_size=3, stride=2, padding=1), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - ConvBNRelu(512, 512, kernel_size=3, stride=1, padding=0), - ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0)), - ]) + self.face_encoder_blocks = nn.LayerList( + [ + nn.Sequential( + ConvBNRelu( + 6, 16, kernel_size=7, stride=1, padding=3)), + nn.Sequential( + ConvBNRelu( + 16, 32, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 32, + 32, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 32, + 32, + kernel_size=3, + stride=1, + padding=1, + residual=True)), + nn.Sequential( + ConvBNRelu( + 32, 64, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 64, + 64, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 64, + 64, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 64, + 64, + kernel_size=3, + stride=1, + padding=1, + residual=True)), + nn.Sequential( + ConvBNRelu( + 64, 128, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 128, + 128, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 128, + 128, + kernel_size=3, + stride=1, + padding=1, + residual=True)), + nn.Sequential( + ConvBNRelu( + 128, 256, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 256, + 256, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 256, + 256, + kernel_size=3, + stride=1, + padding=1, + residual=True)), + nn.Sequential( + ConvBNRelu( + 256, 512, kernel_size=3, stride=2, padding=1), + ConvBNRelu( + 512, + 512, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + ConvBNRelu( + 512, 512, kernel_size=3, stride=1, padding=0), + ConvBNRelu( + 512, 512, kernel_size=1, stride=1, padding=0)), + ]) self.audio_encoder = nn.Sequential( - ConvBNRelu(1, 32, kernel_size=3, stride=1, padding=1), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(32, - 32, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(32, 64, kernel_size=3, stride=(3, 1), padding=1), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, 128, kernel_size=3, stride=3, padding=1), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, 256, kernel_size=3, stride=(3, 2), padding=1), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, 512, kernel_size=3, stride=1, padding=0), - ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0), - ) + ConvBNRelu( + 1, 32, kernel_size=3, stride=1, padding=1), + ConvBNRelu( + 32, 32, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 32, 32, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 32, 64, kernel_size=3, stride=(3, 1), padding=1), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 64, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 64, 128, kernel_size=3, stride=3, padding=1), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 128, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 128, 256, kernel_size=3, stride=(3, 2), padding=1), + ConvBNRelu( + 256, 256, kernel_size=3, stride=1, padding=1, residual=True), + ConvBNRelu( + 256, 512, kernel_size=3, stride=1, padding=0), + ConvBNRelu( + 512, 512, kernel_size=1, stride=1, padding=0), ) - self.face_decoder_blocks = nn.LayerList([ - nn.Sequential( - ConvBNRelu(512, 512, kernel_size=1, stride=1, padding=0), ), - nn.Sequential( - Conv2dTransposeRelu(1024, - 512, - kernel_size=3, - stride=1, - padding=0), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - Conv2dTransposeRelu(1024, - 512, - kernel_size=3, - stride=2, - padding=1, - output_padding=1), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(512, - 512, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - Conv2dTransposeRelu(768, - 384, - kernel_size=3, - stride=2, - padding=1, - output_padding=1), - ConvBNRelu(384, - 384, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(384, - 384, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - Conv2dTransposeRelu(512, - 256, - kernel_size=3, - stride=2, - padding=1, - output_padding=1), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(256, - 256, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - Conv2dTransposeRelu(320, - 128, - kernel_size=3, - stride=2, - padding=1, - output_padding=1), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(128, - 128, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - nn.Sequential( - Conv2dTransposeRelu(160, - 64, - kernel_size=3, - stride=2, - padding=1, - output_padding=1), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ConvBNRelu(64, - 64, - kernel_size=3, - stride=1, - padding=1, - residual=True), - ), - ]) + self.face_decoder_blocks = nn.LayerList( + [ + nn.Sequential( + ConvBNRelu( + 512, 512, kernel_size=1, stride=1, padding=0), ), + nn.Sequential( + Conv2dTransposeRelu( + 1024, 512, kernel_size=3, stride=1, padding=0), + ConvBNRelu( + 512, + 512, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + Conv2dTransposeRelu( + 1024, + 512, + kernel_size=3, + stride=2, + padding=1, + output_padding=1), + ConvBNRelu( + 512, + 512, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 512, + 512, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + Conv2dTransposeRelu( + 768, + 384, + kernel_size=3, + stride=2, + padding=1, + output_padding=1), + ConvBNRelu( + 384, + 384, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 384, + 384, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + Conv2dTransposeRelu( + 512, + 256, + kernel_size=3, + stride=2, + padding=1, + output_padding=1), + ConvBNRelu( + 256, + 256, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 256, + 256, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + Conv2dTransposeRelu( + 320, + 128, + kernel_size=3, + stride=2, + padding=1, + output_padding=1), + ConvBNRelu( + 128, + 128, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 128, + 128, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + nn.Sequential( + Conv2dTransposeRelu( + 160, + 64, + kernel_size=3, + stride=2, + padding=1, + output_padding=1), + ConvBNRelu( + 64, + 64, + kernel_size=3, + stride=1, + padding=1, + residual=True), + ConvBNRelu( + 64, + 64, + kernel_size=3, + stride=1, + padding=1, + residual=True), ), + ]) self.output_block = nn.Sequential( - ConvBNRelu(80, 32, kernel_size=3, stride=1, padding=1), - nn.Conv2D(32, 3, kernel_size=1, stride=1, padding=0), nn.Sigmoid()) + ConvBNRelu( + 80, 32, kernel_size=3, stride=1, padding=1), + nn.Conv2D( + 32, 3, kernel_size=1, stride=1, padding=0), + nn.Sigmoid()) def forward(self, audio_sequences, face_sequences): B = audio_sequences.shape[0] input_dim_size = len(face_sequences.shape) if input_dim_size > 4: - audio_sequences = paddle.concat([ - audio_sequences[:, i] for i in range(audio_sequences.shape[1]) - ], - axis=0) - face_sequences = paddle.concat([ - face_sequences[:, :, i] for i in range(face_sequences.shape[2]) - ], - axis=0) + audio_sequences = paddle.concat( + [ + audio_sequences[:, i] + for i in range(audio_sequences.shape[1]) + ], + axis=0) + face_sequences = paddle.concat( + [ + face_sequences[:, :, i] + for i in range(face_sequences.shape[2]) + ], + axis=0) audio_embedding = self.audio_encoder(audio_sequences) diff --git a/paddlers/models/ppgan/models/lapstyle_model.py b/paddlers/models/ppgan/models/lapstyle_model.py index bf090ea4..b8985130 100644 --- a/paddlers/models/ppgan/models/lapstyle_model.py +++ b/paddlers/models/ppgan/models/lapstyle_model.py @@ -76,9 +76,8 @@ def backward_Dec(self): """content loss""" self.loss_c = 0 for layer in self.content_layers: - self.loss_c += self.calc_content_loss(self.tF[layer], - self.cF[layer], - norm=True) + self.loss_c += self.calc_content_loss( + self.tF[layer], self.cF[layer], norm=True) self.losses['loss_c'] = self.loss_c """style loss""" self.loss_s = 0 @@ -242,9 +241,8 @@ def backward_G(self): """content loss""" self.loss_c = 0 for layer in self.content_layers: - self.loss_c += self.calc_content_loss(self.tF[layer], - self.cF[layer], - norm=True) + self.loss_c += self.calc_content_loss( + self.tF[layer], self.cF[layer], norm=True) self.losses['loss_c'] = self.loss_c """style loss""" self.loss_s = 0 @@ -389,9 +387,8 @@ def backward_G(self): """content loss""" self.loss_c = 0 for layer in self.content_layers: - self.loss_c += self.calc_content_loss(self.tF[layer], - self.cF[layer], - norm=True) + self.loss_c += self.calc_content_loss( + self.tF[layer], self.cF[layer], norm=True) self.losses['loss_c'] = self.loss_c """style loss""" self.loss_s = 0 @@ -401,8 +398,8 @@ def backward_G(self): """relative loss""" self.loss_style_remd = self.calc_style_emd_loss(self.tF['r41'], self.sF['r41']) - self.loss_content_relt = self.calc_content_relt_loss( - self.tF['r41'], self.cF['r41']) + self.loss_content_relt = self.calc_content_relt_loss(self.tF['r41'], + self.cF['r41']) self.losses['loss_style_remd'] = self.loss_style_remd self.losses['loss_content_relt'] = self.loss_content_relt """gan loss""" diff --git a/paddlers/models/ppgan/models/mpr_model.py b/paddlers/models/ppgan/models/mpr_model.py index d88e8f11..fdeea4ff 100644 --- a/paddlers/models/ppgan/models/mpr_model.py +++ b/paddlers/models/ppgan/models/mpr_model.py @@ -29,6 +29,7 @@ class MPRModel(BaseModel): Paper: MPR: Multi-Stage Progressive Image Restoration (CVPR 2021). https://arxiv.org/abs/2102.02808 """ + def __init__(self, generator, char_criterion=None, edge_criterion=None): """Initialize the MPR class. diff --git a/paddlers/models/ppgan/models/msvsr_model.py b/paddlers/models/ppgan/models/msvsr_model.py index 3ee6fbd3..e51dd277 100644 --- a/paddlers/models/ppgan/models/msvsr_model.py +++ b/paddlers/models/ppgan/models/msvsr_model.py @@ -30,6 +30,7 @@ class MultiStageVSRModel(BaseSRModel): Paper: PP-MSVSR: Multi-Stage Video Super-Resolution, 2021 """ + def __init__(self, generator, fix_iter, pixel_criterion=None): """Initialize the PP-MSVSR class. diff --git a/paddlers/models/ppgan/models/photopen_model.py b/paddlers/models/ppgan/models/photopen_model.py index af1fab4e..95efad43 100644 --- a/paddlers/models/ppgan/models/photopen_model.py +++ b/paddlers/models/ppgan/models/photopen_model.py @@ -28,39 +28,38 @@ @MODELS.register() class PhotoPenModel(BaseModel): - def __init__(self, - generator, - discriminator, - criterion, - label_nc, - contain_dontcare_label, - batchSize, - crop_size, - lambda_feat, - ): + def __init__( + self, + generator, + discriminator, + criterion, + label_nc, + contain_dontcare_label, + batchSize, + crop_size, + lambda_feat, ): super(PhotoPenModel, self).__init__() - + opt = { - 'label_nc': label_nc, - 'contain_dontcare_label': contain_dontcare_label, - 'batchSize': batchSize, - 'crop_size': crop_size, - 'lambda_feat': lambda_feat, -# 'semantic_nc': semantic_nc, -# 'use_vae': use_vae, -# 'nef': nef, - } + 'label_nc': label_nc, + 'contain_dontcare_label': contain_dontcare_label, + 'batchSize': batchSize, + 'crop_size': crop_size, + 'lambda_feat': lambda_feat, + # 'semantic_nc': semantic_nc, + # 'use_vae': use_vae, + # 'nef': nef, + } self.opt = Dict(opt) - - + # define nets self.nets['net_gen'] = build_generator(generator) -# init_weights(self.nets['net_gen']) + # init_weights(self.nets['net_gen']) self.nets['net_des'] = build_discriminator(discriminator) -# init_weights(self.nets['net_des']) + # init_weights(self.nets['net_des']) self.net_vgg = build_criterion(criterion) - + def setup_input(self, input): if 'img' in input.keys(): self.img = paddle.to_tensor(input['img']) @@ -77,48 +76,49 @@ def backward_G(self): real_data = paddle.concat((self.one_hot, self.img), 1) fake_and_real_data = paddle.concat((fake_data, real_data), 0) pred = self.nets['net_des'](fake_and_real_data) - """content loss""" g_ganloss = 0. for i in range(len(pred)): pred_i = pred[i][-1][:self.opt.batchSize] - new_loss = -pred_i.mean() # hinge loss + new_loss = -pred_i.mean() # hinge loss g_ganloss += new_loss g_ganloss /= len(pred) g_featloss = 0. for i in range(len(pred)): - for j in range(len(pred[i]) - 1): # 除去最后一层的中间层featuremap - unweighted_loss = (pred[i][j][:self.opt.batchSize] - pred[i][j][self.opt.batchSize:]).abs().mean() # L1 loss + for j in range(len(pred[i]) - 1): # 除去最后一层的中间层featuremap + unweighted_loss = ( + pred[i][j][:self.opt.batchSize] - + pred[i][j][self.opt.batchSize:]).abs().mean() # L1 loss g_featloss += unweighted_loss * self.opt.lambda_feat / len(pred) - + g_vggloss = self.net_vgg(self.img, self.img_f) self.g_loss = g_ganloss + g_featloss + g_vggloss - + self.g_loss.backward() self.losses['g_ganloss'] = g_ganloss self.losses['g_featloss'] = g_featloss self.losses['g_vggloss'] = g_vggloss - def backward_D(self): fake_data = paddle.concat((self.one_hot, self.img_f), 1) real_data = paddle.concat((self.one_hot, self.img), 1) fake_and_real_data = paddle.concat((fake_data, real_data), 0) pred = self.nets['net_des'](fake_and_real_data) - """content loss""" df_ganloss = 0. for i in range(len(pred)): pred_i = pred[i][-1][:self.opt.batchSize] - new_loss = -paddle.minimum(-pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss + new_loss = -paddle.minimum( + -pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss df_ganloss += new_loss df_ganloss /= len(pred) dr_ganloss = 0. for i in range(len(pred)): pred_i = pred[i][-1][self.opt.batchSize:] - new_loss = -paddle.minimum(pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss + new_loss = -paddle.minimum( + pred_i - 1, paddle.zeros_like(pred_i)).mean() # hingle loss dr_ganloss += new_loss dr_ganloss /= len(pred) @@ -126,19 +126,18 @@ def backward_D(self): self.d_loss.backward() self.losses['df_ganloss'] = df_ganloss self.losses['dr_ganloss'] = dr_ganloss - - + def train_iter(self, optimizers=None): self.forward() self.optimizers['optimG'].clear_grad() self.backward_G() self.optimizers['optimG'].step() - + self.forward() self.optimizers['optimD'].clear_grad() self.backward_D() self.optimizers['optimD'].step() - + def test_iter(self, metrics=None): self.eval() with paddle.no_grad(): @@ -159,7 +158,6 @@ def setup_optimizers(self, lr, cfg): lr = learning_rate * 4 else: lr = learning_rate - self.optimizers[opt_name] = build_optimizer( - cfg_, lr, parameters) + self.optimizers[opt_name] = build_optimizer(cfg_, lr, parameters) return self.optimizers diff --git a/paddlers/models/ppgan/models/pix2pix_model.py b/paddlers/models/ppgan/models/pix2pix_model.py index 2c8d5523..a816def2 100644 --- a/paddlers/models/ppgan/models/pix2pix_model.py +++ b/paddlers/models/ppgan/models/pix2pix_model.py @@ -31,6 +31,7 @@ class Pix2PixModel(BaseModel): pix2pix paper: https://arxiv.org/pdf/1611.07004.pdf """ + def __init__(self, generator, discriminator=None, diff --git a/paddlers/models/ppgan/models/sr_model.py b/paddlers/models/ppgan/models/sr_model.py index 767bf27e..8eebf61a 100644 --- a/paddlers/models/ppgan/models/sr_model.py +++ b/paddlers/models/ppgan/models/sr_model.py @@ -27,6 +27,7 @@ class BaseSRModel(BaseModel): """Base SR model for single image super-resolution. """ + def __init__(self, generator, pixel_criterion=None, use_init_weight=False): """ Args: @@ -85,8 +86,8 @@ def test_iter(self, metrics=None): def init_sr_weight(net): def reset_func(m): - if hasattr(m, 'weight') and (not isinstance( - m, (nn.BatchNorm, nn.BatchNorm2D))): + if hasattr(m, 'weight') and ( + not isinstance(m, (nn.BatchNorm, nn.BatchNorm2D))): reset_parameters(m) net.apply(reset_func) diff --git a/paddlers/models/ppgan/models/starganv2_model.py b/paddlers/models/ppgan/models/starganv2_model.py index f7d5e5e6..85acf128 100644 --- a/paddlers/models/ppgan/models/starganv2_model.py +++ b/paddlers/models/ppgan/models/starganv2_model.py @@ -88,11 +88,12 @@ def adv_loss(logits, target): def r1_reg(d_out, x_in): # zero-centered gradient penalty for real images batch_size = x_in.shape[0] - grad_dout = paddle.grad(outputs=d_out.sum(), - inputs=x_in, - create_graph=True, - retain_graph=True, - only_inputs=True)[0] + grad_dout = paddle.grad( + outputs=d_out.sum(), + inputs=x_in, + create_graph=True, + retain_graph=True, + only_inputs=True)[0] grad_dout2 = grad_dout.pow(2) assert (grad_dout2.shape == x_in.shape) reg = 0.5 * paddle.reshape(grad_dout2, (batch_size, -1)).sum(1).mean(0) @@ -108,8 +109,8 @@ def soft_update(source, target, beta=1.0): target_model_map = dict(target.named_parameters()) for param_name, source_param in source.named_parameters(): target_param = target_model_map[param_name] - target_param.set_value(beta * source_param + - (1.0 - beta) * target_param) + target_param.set_value(beta * source_param + (1.0 - beta) * + target_param) def dump_model(model): @@ -196,18 +197,17 @@ def he_init(module): @MODELS.register() class StarGANv2Model(BaseModel): def __init__( - self, - generator, - style=None, - mapping=None, - discriminator=None, - fan=None, - latent_dim=16, - lambda_reg=1, - lambda_sty=1, - lambda_ds=1, - lambda_cyc=1, - ): + self, + generator, + style=None, + mapping=None, + discriminator=None, + fan=None, + latent_dim=16, + lambda_reg=1, + lambda_sty=1, + lambda_ds=1, + lambda_cyc=1, ): super(StarGANv2Model, self).__init__() self.w_hpf = generator['w_hpf'] self.nets_ema = {} @@ -277,69 +277,74 @@ def train_iter(self, optimizers=None): masks = None # train the discriminator - d_loss, d_losses_latent = compute_d_loss(self.nets, - self.lambda_reg, - x_real, - y_org, - y_trg, - z_trg=z_trg, - masks=masks) + d_loss, d_losses_latent = compute_d_loss( + self.nets, + self.lambda_reg, + x_real, + y_org, + y_trg, + z_trg=z_trg, + masks=masks) self._reset_grad(optimizers) d_loss.backward() optimizers['discriminator'].minimize(d_loss) - d_loss, d_losses_ref = compute_d_loss(self.nets, - self.lambda_reg, - x_real, - y_org, - y_trg, - x_ref=x_ref, - masks=masks) + d_loss, d_losses_ref = compute_d_loss( + self.nets, + self.lambda_reg, + x_real, + y_org, + y_trg, + x_ref=x_ref, + masks=masks) self._reset_grad(optimizers) d_loss.backward() optimizers['discriminator'].step() # train the generator - g_loss, g_losses_latent = compute_g_loss(self.nets, - self.w_hpf, - self.lambda_sty, - self.lambda_ds, - self.lambda_cyc, - x_real, - y_org, - y_trg, - z_trgs=[z_trg, z_trg2], - masks=masks) + g_loss, g_losses_latent = compute_g_loss( + self.nets, + self.w_hpf, + self.lambda_sty, + self.lambda_ds, + self.lambda_cyc, + x_real, + y_org, + y_trg, + z_trgs=[z_trg, z_trg2], + masks=masks) self._reset_grad(optimizers) g_loss.backward() optimizers['generator'].step() optimizers['mapping_network'].step() optimizers['style_encoder'].step() - g_loss, g_losses_ref = compute_g_loss(self.nets, - self.w_hpf, - self.lambda_sty, - self.lambda_ds, - self.lambda_cyc, - x_real, - y_org, - y_trg, - x_refs=[x_ref, x_ref2], - masks=masks) + g_loss, g_losses_ref = compute_g_loss( + self.nets, + self.w_hpf, + self.lambda_sty, + self.lambda_ds, + self.lambda_cyc, + x_real, + y_org, + y_trg, + x_refs=[x_ref, x_ref2], + masks=masks) self._reset_grad(optimizers) g_loss.backward() optimizers['generator'].step() # compute moving average of network parameters - soft_update(self.nets['generator'], - self.nets_ema['generator'], - beta=0.999) - soft_update(self.nets['mapping_network'], - self.nets_ema['mapping_network'], - beta=0.999) - soft_update(self.nets['style_encoder'], - self.nets_ema['style_encoder'], - beta=0.999) + soft_update( + self.nets['generator'], self.nets_ema['generator'], beta=0.999) + soft_update( + self.nets['mapping_network'], + self.nets_ema['mapping_network'], + beta=0.999) + soft_update( + self.nets['style_encoder'], + self.nets_ema['style_encoder'], + beta=0.999) # decay weight for diversity sensitive loss if self.lambda_ds > 0: @@ -357,15 +362,16 @@ def test_iter(self, metrics=None): #TODO self.nets_ema['generator'].eval() self.nets_ema['style_encoder'].eval() - soft_update(self.nets['generator'], - self.nets_ema['generator'], - beta=0.999) - soft_update(self.nets['mapping_network'], - self.nets_ema['mapping_network'], - beta=0.999) - soft_update(self.nets['style_encoder'], - self.nets_ema['style_encoder'], - beta=0.999) + soft_update( + self.nets['generator'], self.nets_ema['generator'], beta=0.999) + soft_update( + self.nets['mapping_network'], + self.nets_ema['mapping_network'], + beta=0.999) + soft_update( + self.nets['style_encoder'], + self.nets_ema['style_encoder'], + beta=0.999) src_img = self.input['src'] ref_img = self.input['ref'] ref_label = self.input['ref_cls'] diff --git a/paddlers/models/ppgan/models/styleganv2_model.py b/paddlers/models/ppgan/models/styleganv2_model.py index 71f33b95..d7b70158 100644 --- a/paddlers/models/ppgan/models/styleganv2_model.py +++ b/paddlers/models/ppgan/models/styleganv2_model.py @@ -25,7 +25,6 @@ from ..solver import build_lr_scheduler, build_optimizer - def r1_penalty(real_pred, real_img): """ R1 regularization for discriminator. The core idea is to @@ -40,27 +39,25 @@ def r1_penalty(real_pred, real_img): Eq. 9 in Which training methods for GANs do actually converge. """ - grad_real = paddle.grad(outputs=real_pred.sum(), - inputs=real_img, - create_graph=True)[0] + grad_real = paddle.grad( + outputs=real_pred.sum(), inputs=real_img, create_graph=True)[0] grad_penalty = (grad_real * grad_real).reshape([grad_real.shape[0], -1]).sum(1).mean() return grad_penalty def g_path_regularize(fake_img, latents, mean_path_length, decay=0.01): - noise = paddle.randn(fake_img.shape) / math.sqrt( - fake_img.shape[2] * fake_img.shape[3]) - grad = paddle.grad(outputs=(fake_img * noise).sum(), - inputs=latents, - create_graph=True)[0] + noise = paddle.randn(fake_img.shape) / math.sqrt(fake_img.shape[2] * + fake_img.shape[3]) + grad = paddle.grad( + outputs=(fake_img * noise).sum(), inputs=latents, create_graph=True)[0] path_lengths = paddle.sqrt((grad * grad).sum(2).mean(1)) path_mean = mean_path_length + decay * (path_lengths.mean() - mean_path_length) - path_penalty = ((path_lengths - path_mean) * - (path_lengths - path_mean)).mean() + path_penalty = ( + (path_lengths - path_mean) * (path_lengths - path_mean)).mean() return path_penalty, path_lengths.detach().mean(), path_mean.detach() @@ -72,6 +69,7 @@ class StyleGAN2Model(BaseModel): StyleGAN2 paper: https://arxiv.org/pdf/1912.04958.pdf """ + def __init__(self, generator, discriminator=None, @@ -95,11 +93,11 @@ def __init__(self, 'gen_iters', 4) self.disc_iters = 16 if self.params is None else self.params.get( 'disc_iters', 16) - self.disc_start_iters = (0 if self.params is None else self.params.get( - 'disc_start_iters', 0)) + self.disc_start_iters = (0 if self.params is None else + self.params.get('disc_start_iters', 0)) - self.visual_iters = (500 if self.params is None else self.params.get( - 'visual_iters', 500)) + self.visual_iters = (500 if self.params is None else + self.params.get('visual_iters', 500)) self.mixing_prob = mixing_prob self.num_style_feat = num_style_feat @@ -174,8 +172,8 @@ def model_ema(self, decay=0.999): net_g_ema_params = dict(neg_g_ema.named_parameters()) for k in net_g_ema_params.keys(): - net_g_ema_params[k].set_value(net_g_ema_params[k] * (decay) + - (net_g_params[k] * (1 - decay))) + net_g_ema_params[k].set_value(net_g_ema_params[k] * (decay) + ( + net_g_params[k] * (1 - decay))) def setup_input(self, input): """Unpack input data from the dataloader and perform necessary pre-processing steps. @@ -220,9 +218,9 @@ def train_iter(self, optimizers=None): real_pred = self.nets['disc'](self.real_img) # wgan loss with softplus (logistic loss) for discriminator l_d_total = 0. - l_d = self.gan_criterion(real_pred, True, - is_disc=True) + self.gan_criterion( - fake_pred, False, is_disc=True) + l_d = self.gan_criterion( + real_pred, True, is_disc=True) + self.gan_criterion( + fake_pred, False, is_disc=True) self.losses['l_d'] = l_d # In wgan, real_score should be positive and fake_score should be # negative @@ -235,8 +233,8 @@ def train_iter(self, optimizers=None): self.real_img.stop_gradient = False real_pred = self.nets['disc'](self.real_img) l_d_r1 = r1_penalty(real_pred, self.real_img) - l_d_r1 = (self.r1_reg_weight / 2 * l_d_r1 * self.disc_iters + - 0 * real_pred[0]) + l_d_r1 = (self.r1_reg_weight / 2 * l_d_r1 * self.disc_iters + 0 * + real_pred[0]) self.losses['l_d_r1'] = l_d_r1.detach().mean() @@ -265,8 +263,8 @@ def train_iter(self, optimizers=None): l_g_path, path_lengths, self.mean_path_length = g_path_regularize( fake_img, latents, self.mean_path_length) - l_g_path = (self.path_reg_weight * self.gen_iters * l_g_path + - 0 * fake_img[0, 0, 0, 0]) + l_g_path = (self.path_reg_weight * self.gen_iters * l_g_path + 0 * + fake_img[0, 0, 0, 0]) l_g_total += l_g_path self.losses['l_g_path'] = l_g_path.detach().mean() @@ -301,9 +299,10 @@ def set_generator(self, generator): def forward(self, style, truncation): truncation_latent = self.generator.get_mean_style() - out = self.generator(styles=style, - truncation=truncation, - truncation_latent=truncation_latent) + out = self.generator( + styles=style, + truncation=truncation, + truncation_latent=truncation_latent) return out[0] def export_model(self, @@ -316,6 +315,7 @@ def export_model(self, truncation = paddle.rand(shape=inputs_size[1], dtype='float32') if output_dir is None: output_dir = 'inference_model' - paddle.jit.save(infer_generator, - os.path.join(output_dir, "stylegan2model_gen"), - input_spec=[style, truncation]) + paddle.jit.save( + infer_generator, + os.path.join(output_dir, "stylegan2model_gen"), + input_spec=[style, truncation]) diff --git a/paddlers/models/ppgan/models/ugatit_model.py b/paddlers/models/ppgan/models/ugatit_model.py index 007dde19..d315a457 100644 --- a/paddlers/models/ppgan/models/ugatit_model.py +++ b/paddlers/models/ppgan/models/ugatit_model.py @@ -34,6 +34,7 @@ class UGATITModel(BaseModel): UGATIT paper: https://arxiv.org/pdf/1907.10830.pdf """ + def __init__(self, generator, discriminator_g=None, @@ -154,6 +155,7 @@ def test_iter(self, metrics=None): def train_iter(self, optimizers=None): """Calculate losses, gradients, and update network weights; called in every training iteration""" + def _criterion(loss_func, logit, is_real): if is_real: target = paddle.ones_like(logit) @@ -177,42 +179,42 @@ def _criterion(loss_func, logit, is_real): fake_GB_logit, fake_GB_cam_logit, _ = self.nets['disGB'](self.fake_A2B) fake_LB_logit, fake_LB_cam_logit, _ = self.nets['disLB'](self.fake_A2B) - D_ad_loss_GA = _criterion(self.MSE_loss, - real_GA_logit, True) + _criterion( - self.MSE_loss, fake_GA_logit, False) + D_ad_loss_GA = _criterion(self.MSE_loss, real_GA_logit, + True) + _criterion(self.MSE_loss, + fake_GA_logit, False) D_ad_cam_loss_GA = _criterion( self.MSE_loss, real_GA_cam_logit, True) + _criterion( self.MSE_loss, fake_GA_cam_logit, False) - D_ad_loss_LA = _criterion(self.MSE_loss, - real_LA_logit, True) + _criterion( - self.MSE_loss, fake_LA_logit, False) + D_ad_loss_LA = _criterion(self.MSE_loss, real_LA_logit, + True) + _criterion(self.MSE_loss, + fake_LA_logit, False) D_ad_cam_loss_LA = _criterion( self.MSE_loss, real_LA_cam_logit, True) + _criterion( self.MSE_loss, fake_LA_cam_logit, False) - D_ad_loss_GB = _criterion(self.MSE_loss, - real_GB_logit, True) + _criterion( - self.MSE_loss, fake_GB_logit, False) + D_ad_loss_GB = _criterion(self.MSE_loss, real_GB_logit, + True) + _criterion(self.MSE_loss, + fake_GB_logit, False) D_ad_cam_loss_GB = _criterion( self.MSE_loss, real_GB_cam_logit, True) + _criterion( self.MSE_loss, fake_GB_cam_logit, False) - D_ad_loss_LB = _criterion(self.MSE_loss, - real_LB_logit, True) + _criterion( - self.MSE_loss, fake_LB_logit, False) + D_ad_loss_LB = _criterion(self.MSE_loss, real_LB_logit, + True) + _criterion(self.MSE_loss, + fake_LB_logit, False) D_ad_cam_loss_LB = _criterion( self.MSE_loss, real_LB_cam_logit, True) + _criterion( self.MSE_loss, fake_LB_cam_logit, False) - D_loss_A = self.adv_weight * (D_ad_loss_GA + D_ad_cam_loss_GA + - D_ad_loss_LA + D_ad_cam_loss_LA) - D_loss_B = self.adv_weight * (D_ad_loss_GB + D_ad_cam_loss_GB + - D_ad_loss_LB + D_ad_cam_loss_LB) + D_loss_A = self.adv_weight * ( + D_ad_loss_GA + D_ad_cam_loss_GA + D_ad_loss_LA + D_ad_cam_loss_LA) + D_loss_B = self.adv_weight * ( + D_ad_loss_GB + D_ad_cam_loss_GB + D_ad_loss_LB + D_ad_cam_loss_LB) Discriminator_loss = D_loss_A + D_loss_B Discriminator_loss.backward() @@ -250,13 +252,13 @@ def _criterion(loss_func, logit, is_real): G_identity_loss_A = self.L1_loss(fake_A2A, self.real_A) G_identity_loss_B = self.L1_loss(fake_B2B, self.real_B) - G_cam_loss_A = _criterion(self.BCE_loss, - fake_B2A_cam_logit, True) + _criterion( - self.BCE_loss, fake_A2A_cam_logit, False) + G_cam_loss_A = _criterion(self.BCE_loss, fake_B2A_cam_logit, + True) + _criterion(self.BCE_loss, + fake_A2A_cam_logit, False) - G_cam_loss_B = _criterion(self.BCE_loss, - fake_A2B_cam_logit, True) + _criterion( - self.BCE_loss, fake_B2B_cam_logit, False) + G_cam_loss_B = _criterion(self.BCE_loss, fake_A2B_cam_logit, + True) + _criterion(self.BCE_loss, + fake_B2B_cam_logit, False) G_loss_A = self.adv_weight * ( G_ad_loss_GA + G_ad_cam_loss_GA + G_ad_loss_LA + G_ad_cam_loss_LA diff --git a/paddlers/models/ppgan/models/wav2lip_hq_model.py b/paddlers/models/ppgan/models/wav2lip_hq_model.py index 034e81f9..23d6065b 100644 --- a/paddlers/models/ppgan/models/wav2lip_hq_model.py +++ b/paddlers/models/ppgan/models/wav2lip_hq_model.py @@ -37,6 +37,7 @@ class Wav2LipModelHq(BaseModel): By default, it uses a '--netG Wav2lip' generator, a '--netD SyncNetColor' discriminator. """ + def __init__(self, generator, discriminator_sync=None, @@ -61,9 +62,8 @@ def __init__(self, self.eval_perceptual_losses = [] # define networks (both generator and discriminator) self.nets['netG'] = build_generator(generator) - init_weights(self.nets['netG'], - init_type='kaiming', - distribution='uniform') + init_weights( + self.nets['netG'], init_type='kaiming', distribution='uniform') if self.is_train: self.nets['netDS'] = build_discriminator(discriminator_sync) weights_path = get_weights_path_from_url(SYNCNET_WEIGHT_URL) @@ -71,9 +71,8 @@ def __init__(self, self.nets['netDS'].load_dict(params) self.nets['netDH'] = build_discriminator(discriminator_hq) - init_weights(self.nets['netDH'], - init_type='kaiming', - distribution='uniform') + init_weights( + self.nets['netDH'], init_type='kaiming', distribution='uniform') if self.is_train: self.recon_loss = paddle.nn.L1Loss() @@ -103,8 +102,9 @@ def backward_G(self): self.l1_loss = self.recon_loss(self.g, self.y) if self.disc_wt > 0.: - if isinstance(self.nets['netDH'], paddle.DataParallel - ): #paddle.fluid.dygraph.parallel.DataParallel) + if isinstance( + self.nets['netDH'], paddle. + DataParallel): #paddle.fluid.dygraph.parallel.DataParallel) self.perceptual_loss = self.nets[ 'netDH']._layers.perceptual_forward(self.g) else: @@ -163,10 +163,12 @@ def test_iter(self, metrics=None): pred_real = self.nets['netDH'](self.y) pred_fake = self.nets['netDH'](self.g) - disc_real_loss = F.binary_cross_entropy( - pred_real, paddle.ones((len(pred_real), 1))) - disc_fake_loss = F.binary_cross_entropy( - pred_fake, paddle.zeros((len(pred_fake), 1))) + disc_real_loss = F.binary_cross_entropy(pred_real, + paddle.ones( + (len(pred_real), 1))) + disc_fake_loss = F.binary_cross_entropy(pred_fake, + paddle.zeros( + (len(pred_fake), 1))) self.eval_disc_fake_losses.append(disc_fake_loss.numpy().item()) self.eval_disc_real_losses.append(disc_real_loss.numpy().item()) @@ -178,8 +180,8 @@ def test_iter(self, metrics=None): if isinstance(self.nets['netDH'], paddle.DataParallel ): #paddle.fluid.dygraph.parallel.DataParallel) perceptual_loss = self.nets[ - 'netDH']._layers.perceptual_forward( - self.g).numpy().item() + 'netDH']._layers.perceptual_forward(self.g).numpy( + ).item() else: perceptual_loss = self.nets['netDH'].perceptual_forward( self.g).numpy().item() @@ -201,11 +203,10 @@ def test_iter(self, metrics=None): if averaged_sync_loss < .75: self.syncnet_wt = 0.01 - print( - 'L1: {}, Sync loss: {}, Percep: {}, Fake: {}, Real: {}'.format( - averaged_recon_loss, averaged_sync_loss, - averaged_perceptual_loss, averaged_disc_fake_loss, - averaged_disc_real_loss)) + print('L1: {}, Sync loss: {}, Percep: {}, Fake: {}, Real: {}'. + format(averaged_recon_loss, averaged_sync_loss, + averaged_perceptual_loss, averaged_disc_fake_loss, + averaged_disc_real_loss)) self.eval_sync_losses, self.eval_recon_losses = [], [] self.eval_disc_real_losses, self.eval_disc_fake_losses = [], [] self.eval_perceptual_losses = [] diff --git a/paddlers/models/ppgan/models/wav2lip_model.py b/paddlers/models/ppgan/models/wav2lip_model.py index 852d25be..2a4d2b05 100644 --- a/paddlers/models/ppgan/models/wav2lip_model.py +++ b/paddlers/models/ppgan/models/wav2lip_model.py @@ -54,6 +54,7 @@ class Wav2LipModel(BaseModel): By default, it uses a '--netG Wav2lip' generator, a '--netD SyncNetColor' discriminator. """ + def __init__(self, generator, discriminator=None, @@ -110,8 +111,8 @@ def backward_G(self): self.losses['sync_loss'] = self.sync_loss self.losses['l1_loss'] = self.l1_loss - self.loss_G = self.syncnet_wt * self.sync_loss + ( - 1 - self.syncnet_wt) * self.l1_loss + self.loss_G = self.syncnet_wt * self.sync_loss + (1 - self.syncnet_wt + ) * self.l1_loss self.loss_G.backward() def train_iter(self, optimizers=None): diff --git a/paddlers/models/ppgan/modules/caffevgg.py b/paddlers/models/ppgan/modules/caffevgg.py index b0780899..af16b738 100644 --- a/paddlers/models/ppgan/modules/caffevgg.py +++ b/paddlers/models/ppgan/modules/caffevgg.py @@ -15,14 +15,13 @@ class CaffeVGG19(nn.Layer): 'M', 512, 512, 512, 512, 'M' ] - def __init__(self, output_index: int = 26) -> None: + def __init__(self, output_index: int=26) -> None: super().__init__() arch = 'caffevgg19' weights_path = get_path_from_url(model_urls[arch][0], model_urls[arch][1]) - data_dict: dict = np.load(weights_path, - encoding='latin1', - allow_pickle=True).item() + data_dict: dict = np.load( + weights_path, encoding='latin1', allow_pickle=True).item() self.features = self.make_layers(self.cfg, data_dict) del data_dict self.features = nn.Sequential(*self.features.sublayers()[:output_index]) diff --git a/paddlers/models/ppgan/modules/dense_motion.py b/paddlers/models/ppgan/modules/dense_motion.py index 131d6f4b..6217c516 100644 --- a/paddlers/models/ppgan/modules/dense_motion.py +++ b/paddlers/models/ppgan/modules/dense_motion.py @@ -13,6 +13,7 @@ class DenseMotionNetwork(nn.Layer): """ Module that predicting a dense motion from sparse motion representation given by kp_source and kp_driving """ + def __init__(self, block_expansion, num_blocks, @@ -24,64 +25,71 @@ def __init__(self, kp_variance=0.01, mobile_net=False): super(DenseMotionNetwork, self).__init__() - self.hourglass = Hourglass(block_expansion=block_expansion, - in_features=(num_kp + 1) * - (num_channels + 1), - max_features=max_features, - num_blocks=num_blocks, - mobile_net=mobile_net) + self.hourglass = Hourglass( + block_expansion=block_expansion, + in_features=(num_kp + 1) * (num_channels + 1), + max_features=max_features, + num_blocks=num_blocks, + mobile_net=mobile_net) if mobile_net: self.mask = nn.Sequential( - nn.Conv2D(self.hourglass.out_filters, - self.hourglass.out_filters, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1), - nn.ReLU(), - nn.Conv2D(self.hourglass.out_filters, - self.hourglass.out_filters, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1), - nn.ReLU(), - nn.Conv2D(self.hourglass.out_filters, - num_kp + 1, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1)) + nn.Conv2D( + self.hourglass.out_filters, + self.hourglass.out_filters, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1), + nn.ReLU(), + nn.Conv2D( + self.hourglass.out_filters, + self.hourglass.out_filters, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1), + nn.ReLU(), + nn.Conv2D( + self.hourglass.out_filters, + num_kp + 1, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1)) else: - self.mask = nn.Conv2D(self.hourglass.out_filters, - num_kp + 1, - kernel_size=(7, 7), - padding=(3, 3)) + self.mask = nn.Conv2D( + self.hourglass.out_filters, + num_kp + 1, + kernel_size=(7, 7), + padding=(3, 3)) if estimate_occlusion_map: if mobile_net: - self.occlusion = nn.Sequential( - nn.Conv2D(self.hourglass.out_filters, - self.hourglass.out_filters, - kernel_size=3, - padding=1, - weight_attr=nn.initializer.KaimingUniform()), + self.occlusion = nn.Sequential( + nn.Conv2D( + self.hourglass.out_filters, + self.hourglass.out_filters, + kernel_size=3, + padding=1, + weight_attr=nn.initializer.KaimingUniform()), nn.ReLU(), - nn.Conv2D(self.hourglass.out_filters, - self.hourglass.out_filters, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=1), + nn.Conv2D( + self.hourglass.out_filters, + self.hourglass.out_filters, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=1), nn.ReLU(), - nn.Conv2D(self.hourglass.out_filters, - 1, - kernel_size=3, - padding=1, - weight_attr=nn.initializer.KaimingUniform()) - ) + nn.Conv2D( + self.hourglass.out_filters, + 1, + kernel_size=3, + padding=1, + weight_attr=nn.initializer.KaimingUniform())) else: - self.occlusion = nn.Conv2D(self.hourglass.out_filters, - 1, - kernel_size=(7, 7), - padding=(3, 3)) + self.occlusion = nn.Conv2D( + self.hourglass.out_filters, + 1, + kernel_size=(7, 7), + padding=(3, 3)) else: self.occlusion = None @@ -90,9 +98,8 @@ def __init__(self, self.kp_variance = kp_variance if self.scale_factor != 1: - self.down = AntiAliasInterpolation2d(num_channels, - self.scale_factor, - mobile_net=mobile_net) + self.down = AntiAliasInterpolation2d( + num_channels, self.scale_factor, mobile_net=mobile_net) def create_heatmap_representations(self, source_image, kp_driving, kp_source): @@ -100,12 +107,10 @@ def create_heatmap_representations(self, source_image, kp_driving, Eq 6. in the paper H_k(z) """ spatial_size = source_image.shape[2:] - gaussian_driving = kp2gaussian(kp_driving, - spatial_size=spatial_size, - kp_variance=self.kp_variance) - gaussian_source = kp2gaussian(kp_source, - spatial_size=spatial_size, - kp_variance=self.kp_variance) + gaussian_driving = kp2gaussian( + kp_driving, spatial_size=spatial_size, kp_variance=self.kp_variance) + gaussian_source = kp2gaussian( + kp_source, spatial_size=spatial_size, kp_variance=self.kp_variance) heatmap = gaussian_driving - gaussian_source #adding background feature @@ -121,8 +126,8 @@ def create_sparse_motions(self, source_image, kp_driving, kp_source): Eq 4. in the paper T_{s<-d}(z) """ bs, _, h, w = source_image.shape - identity_grid = make_coordinate_grid((h, w), - type=kp_source['value'].dtype) + identity_grid = make_coordinate_grid( + (h, w), type=kp_source['value'].dtype) identity_grid = identity_grid.reshape([1, 1, h, w, 2]) coordinate_grid = identity_grid - kp_driving['value'].reshape( [bs, self.num_kp, 1, 1, 2]) @@ -146,8 +151,8 @@ def create_sparse_motions(self, source_image, kp_driving, kp_source): #adding background feature identity_grid = paddle.tile(identity_grid, (bs, 1, 1, 1, 1)) - sparse_motions = paddle.concat([identity_grid, driving_to_source], - axis=1) + sparse_motions = paddle.concat( + [identity_grid, driving_to_source], axis=1) return sparse_motions def create_deformed_source_image(self, source_image, sparse_motions): @@ -157,17 +162,18 @@ def create_deformed_source_image(self, source_image, sparse_motions): bs, _, h, w = source_image.shape source_repeat = paddle.tile( source_image.unsqueeze(1).unsqueeze(1), - [1, self.num_kp + 1, 1, 1, 1, 1 - ]) #.repeat(1, self.num_kp + 1, 1, 1, 1, 1) + [1, self.num_kp + 1, 1, 1, 1, + 1]) #.repeat(1, self.num_kp + 1, 1, 1, 1, 1) source_repeat = source_repeat.reshape( [bs * (self.num_kp + 1), -1, h, w]) sparse_motions = sparse_motions.reshape( (bs * (self.num_kp + 1), h, w, -1)) - sparse_deformed = F.grid_sample(source_repeat, - sparse_motions, - mode='bilinear', - padding_mode='zeros', - align_corners=True) + sparse_deformed = F.grid_sample( + source_repeat, + sparse_motions, + mode='bilinear', + padding_mode='zeros', + align_corners=True) sparse_deformed = sparse_deformed.reshape( (bs, self.num_kp + 1, -1, h, w)) return sparse_deformed @@ -183,8 +189,8 @@ def forward(self, source_image, kp_driving, kp_source): source_image, kp_driving, kp_source) sparse_motion = self.create_sparse_motions(source_image, kp_driving, kp_source) - deformed_source = self.create_deformed_source_image( - source_image, sparse_motion) + deformed_source = self.create_deformed_source_image(source_image, + sparse_motion) out_dict['sparse_deformed'] = deformed_source temp = paddle.concat([heatmap_representation, deformed_source], axis=2) diff --git a/paddlers/models/ppgan/modules/first_order.py b/paddlers/models/ppgan/modules/first_order.py index b6b113bc..e551bc53 100644 --- a/paddlers/models/ppgan/modules/first_order.py +++ b/paddlers/models/ppgan/modules/first_order.py @@ -18,6 +18,7 @@ class ImagePyramide(nn.Layer): """ Create image pyramide for computing pyramide perceptual loss. See Sec 3.3 """ + def __init__(self, scales, num_channels): super(ImagePyramide, self).__init__() self.downs = paddle.nn.LayerList() @@ -31,8 +32,8 @@ def __init__(self, scales, num_channels): def forward(self, x): out_dict = {} for scale, down_module in zip(self.name_list, self.downs): - out_dict['prediction_' + - str(scale).replace('-', '.')] = down_module(x) + out_dict['prediction_' + str(scale).replace('-', + '.')] = down_module(x) return out_dict @@ -87,16 +88,19 @@ class ResBlock2d(nn.Layer): """ Res block, preserve spatial resolution. """ + def __init__(self, in_features, kernel_size, padding): super(ResBlock2d, self).__init__() - self.conv1 = nn.Conv2D(in_channels=in_features, - out_channels=in_features, - kernel_size=kernel_size, - padding=padding) - self.conv2 = nn.Conv2D(in_channels=in_features, - out_channels=in_features, - kernel_size=kernel_size, - padding=padding) + self.conv1 = nn.Conv2D( + in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding) + self.conv2 = nn.Conv2D( + in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding) self.norm1 = SyncBatchNorm(in_features) self.norm2 = SyncBatchNorm(in_features) @@ -115,25 +119,29 @@ class MobileResBlock2d(nn.Layer): """ Res block, preserve spatial resolution. """ + def __init__(self, in_features, kernel_size, padding): super(MobileResBlock2d, self).__init__() out_features = in_features * 2 - self.conv_pw = nn.Conv2D(in_channels=in_features, - out_channels=out_features, - kernel_size=1, - padding=0, - bias_attr=False) - self.conv_dw = nn.Conv2D(in_channels=out_features, - out_channels=out_features, - kernel_size=kernel_size, - padding=padding, - groups=out_features, - bias_attr=False) - self.conv_pw_linear = nn.Conv2D(in_channels=out_features, - out_channels=in_features, - kernel_size=1, - padding=0, - bias_attr=False) + self.conv_pw = nn.Conv2D( + in_channels=in_features, + out_channels=out_features, + kernel_size=1, + padding=0, + bias_attr=False) + self.conv_dw = nn.Conv2D( + in_channels=out_features, + out_channels=out_features, + kernel_size=kernel_size, + padding=padding, + groups=out_features, + bias_attr=False) + self.conv_pw_linear = nn.Conv2D( + in_channels=out_features, + out_channels=in_features, + kernel_size=1, + padding=0, + bias_attr=False) self.norm1 = SyncBatchNorm(in_features) self.norm_pw = SyncBatchNorm(out_features) self.norm_dw = SyncBatchNorm(out_features) @@ -159,6 +167,7 @@ class UpBlock2d(nn.Layer): """ Upsampling block for use in decoder. """ + def __init__(self, in_features, out_features, @@ -167,11 +176,12 @@ def __init__(self, groups=1): super(UpBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, - out_channels=out_features, - kernel_size=kernel_size, - padding=padding, - groups=groups) + self.conv = nn.Conv2D( + in_channels=in_features, + out_channels=out_features, + kernel_size=kernel_size, + padding=padding, + groups=groups) self.norm = SyncBatchNorm(out_features) def forward(self, x): @@ -186,6 +196,7 @@ class MobileUpBlock2d(nn.Layer): """ Upsampling block for use in decoder. """ + def __init__(self, in_features, out_features, @@ -194,17 +205,19 @@ def __init__(self, groups=1): super(MobileUpBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, - out_channels=in_features, - kernel_size=kernel_size, - padding=padding, - groups=in_features, - bias_attr=False) - self.conv1 = nn.Conv2D(in_channels=in_features, - out_channels=out_features, - kernel_size=1, - padding=0, - bias_attr=False) + self.conv = nn.Conv2D( + in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding, + groups=in_features, + bias_attr=False) + self.conv1 = nn.Conv2D( + in_channels=in_features, + out_channels=out_features, + kernel_size=1, + padding=0, + bias_attr=False) self.norm = SyncBatchNorm(in_features) self.norm1 = SyncBatchNorm(out_features) @@ -223,6 +236,7 @@ class DownBlock2d(nn.Layer): """ Downsampling block for use in encoder. """ + def __init__(self, in_features, out_features, @@ -230,11 +244,12 @@ def __init__(self, padding=1, groups=1): super(DownBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, - out_channels=out_features, - kernel_size=kernel_size, - padding=padding, - groups=groups) + self.conv = nn.Conv2D( + in_channels=in_features, + out_channels=out_features, + kernel_size=kernel_size, + padding=padding, + groups=groups) self.norm = SyncBatchNorm(out_features) self.pool = nn.AvgPool2D(kernel_size=(2, 2)) @@ -250,6 +265,7 @@ class MobileDownBlock2d(nn.Layer): """ Downsampling block for use in encoder. """ + def __init__(self, in_features, out_features, @@ -257,21 +273,23 @@ def __init__(self, padding=1, groups=1): super(MobileDownBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, - out_channels=in_features, - kernel_size=kernel_size, - padding=padding, - groups=in_features, - bias_attr=False) + self.conv = nn.Conv2D( + in_channels=in_features, + out_channels=in_features, + kernel_size=kernel_size, + padding=padding, + groups=in_features, + bias_attr=False) self.norm = SyncBatchNorm(in_features) self.pool = nn.AvgPool2D(kernel_size=(2, 2)) - self.conv1 = nn.Conv2D(in_features, - out_features, - kernel_size=1, - padding=0, - stride=1, - bias_attr=False) + self.conv1 = nn.Conv2D( + in_features, + out_features, + kernel_size=1, + padding=0, + stride=1, + bias_attr=False) self.norm1 = SyncBatchNorm(out_features) def forward(self, x): @@ -289,6 +307,7 @@ class SameBlock2d(nn.Layer): """ Simple block, preserve spatial resolution. """ + def __init__(self, in_features, out_features, @@ -297,13 +316,14 @@ def __init__(self, padding=1, mobile_net=False): super(SameBlock2d, self).__init__() - self.conv = nn.Conv2D(in_channels=in_features, - out_channels=out_features, - kernel_size=kernel_size, - padding=padding, - groups=groups, - bias_attr=(mobile_net == False), - weight_attr=nn.initializer.KaimingUniform()) + self.conv = nn.Conv2D( + in_channels=in_features, + out_channels=out_features, + kernel_size=kernel_size, + padding=padding, + groups=groups, + bias_attr=(mobile_net == False), + weight_attr=nn.initializer.KaimingUniform()) self.norm = SyncBatchNorm(out_features) def forward(self, x): @@ -317,6 +337,7 @@ class Encoder(nn.Layer): """ Hourglass Encoder """ + def __init__(self, block_expansion, in_features, @@ -329,20 +350,20 @@ def __init__(self, for i in range(num_blocks): if mobile_net: down_blocks.append( - MobileDownBlock2d(in_features if i == 0 else min( - max_features, block_expansion * (2**i)), - min(max_features, - block_expansion * (2**(i + 1))), - kernel_size=3, - padding=1)) + MobileDownBlock2d( + in_features if i == 0 else min( + max_features, block_expansion * (2**i)), + min(max_features, block_expansion * (2**(i + 1))), + kernel_size=3, + padding=1)) else: down_blocks.append( - DownBlock2d(in_features if i == 0 else min( - max_features, block_expansion * (2**i)), - min(max_features, - block_expansion * (2**(i + 1))), - kernel_size=3, - padding=1)) + DownBlock2d( + in_features if i == 0 else min( + max_features, block_expansion * (2**i)), + min(max_features, block_expansion * (2**(i + 1))), + kernel_size=3, + padding=1)) self.down_blocks = nn.LayerList(down_blocks) def forward(self, x): @@ -356,6 +377,7 @@ class Decoder(nn.Layer): """ Hourglass Decoder """ + def __init__(self, block_expansion, in_features, @@ -372,16 +394,14 @@ def __init__(self, in_filters = (1 if i == num_blocks - 1 else 2) * min( max_features, block_expansion * (2**(i + 1))) up_blocks.append( - MobileUpBlock2d(in_filters, - out_filters, - kernel_size=3, - padding=1)) + MobileUpBlock2d( + in_filters, out_filters, kernel_size=3, padding=1)) else: in_filters = (1 if i == num_blocks - 1 else 2) * min( max_features, block_expansion * (2**(i + 1))) up_blocks.append( - UpBlock2d(in_filters, out_filters, kernel_size=3, - padding=1)) + UpBlock2d( + in_filters, out_filters, kernel_size=3, padding=1)) self.up_blocks = nn.LayerList(up_blocks) self.out_filters = block_expansion + in_features @@ -399,6 +419,7 @@ class Hourglass(nn.Layer): """ Hourglass architecture. """ + def __init__(self, block_expansion, in_features, @@ -406,16 +427,18 @@ def __init__(self, max_features=256, mobile_net=False): super(Hourglass, self).__init__() - self.encoder = Encoder(block_expansion, - in_features, - num_blocks, - max_features, - mobile_net=mobile_net) - self.decoder = Decoder(block_expansion, - in_features, - num_blocks, - max_features, - mobile_net=mobile_net) + self.encoder = Encoder( + block_expansion, + in_features, + num_blocks, + max_features, + mobile_net=mobile_net) + self.decoder = Decoder( + block_expansion, + in_features, + num_blocks, + max_features, + mobile_net=mobile_net) self.out_filters = self.decoder.out_filters def forward(self, x): @@ -426,6 +449,7 @@ class AntiAliasInterpolation2d(nn.Layer): """ Band-limited downsampling, for better preservation of the input signal. """ + def __init__(self, channels, scale, mobile_net=False): super(AntiAliasInterpolation2d, self).__init__() if mobile_net: @@ -443,7 +467,8 @@ def __init__(self, channels, scale, mobile_net=False): # gaussian function of each dimension. kernel = 1 meshgrids = paddle.meshgrid( - [paddle.arange(size, dtype='float32') for size in kernel_size]) + [paddle.arange( + size, dtype='float32') for size in kernel_size]) for size, std, mgrid in zip(kernel_size, sigma, meshgrids): mean = (size - 1) / 2 kernel *= paddle.exp(-(mgrid - mean)**2 / (2 * std**2 + 1e-9)) @@ -452,7 +477,7 @@ def __init__(self, channels, scale, mobile_net=False): kernel = kernel / paddle.sum(kernel) # Reshape to depthwise convolutional weight kernel = kernel.reshape([1, 1, *kernel.shape]) - kernel = paddle.tile(kernel, [channels, *[1] * (kernel.dim() - 1)]) + kernel = paddle.tile(kernel, [channels, * [1] * (kernel.dim() - 1)]) self.register_buffer('weight', kernel) self.groups = channels diff --git a/paddlers/models/ppgan/modules/init.py b/paddlers/models/ppgan/modules/init.py index 12784ce1..47fa4065 100644 --- a/paddlers/models/ppgan/modules/init.py +++ b/paddlers/models/ppgan/modules/init.py @@ -74,8 +74,8 @@ def calculate_gain(nonlinearity, param=None): # True/False are instances of int, hence check above negative_slope = param else: - raise ValueError( - "negative_slope {} not a valid number".format(param)) + raise ValueError("negative_slope {} not a valid number".format( + param)) return math.sqrt(2.0 / (1 + negative_slope**2)) else: raise ValueError("Unsupported nonlinearity {}".format(nonlinearity)) @@ -259,19 +259,15 @@ def kaiming_init(layer, distribution='normal'): assert distribution in ['uniform', 'normal'] if distribution == 'uniform': - kaiming_uniform_(layer.weight, - a=a, - mode=mode, - nonlinearity=nonlinearity) + kaiming_uniform_( + layer.weight, a=a, mode=mode, nonlinearity=nonlinearity) else: kaiming_normal_(layer.weight, a=a, mode=mode, nonlinearity=nonlinearity) if hasattr(layer, 'bias') and layer.bias is not None: constant_(layer.bias, bias) -def init_weights(net, - init_type='normal', - init_gain=0.02, +def init_weights(net, init_type='normal', init_gain=0.02, distribution='normal'): """Initialize network weights. Args: @@ -281,10 +277,11 @@ def init_weights(net, We use 'normal' in the original pix2pix and CycleGAN paper. But xavier and kaiming might work better for some applications. Feel free to try yourself. """ + def init_func(m): # define the initialization function classname = m.__class__.__name__ - if hasattr(m, 'weight') and (classname.find('Conv') != -1 - or classname.find('Linear') != -1): + if hasattr(m, 'weight') and (classname.find('Conv') != -1 or + classname.find('Linear') != -1): if init_type == 'normal': normal_(m.weight, 0.0, init_gain) elif init_type == 'xavier': diff --git a/paddlers/models/ppgan/modules/keypoint_detector.py b/paddlers/models/ppgan/modules/keypoint_detector.py index 809e64d4..f609633d 100644 --- a/paddlers/models/ppgan/modules/keypoint_detector.py +++ b/paddlers/models/ppgan/modules/keypoint_detector.py @@ -12,6 +12,7 @@ class KPDetector(nn.Layer): """ Detecting a keypoints. Return keypoint position and jacobian near each keypoint. """ + def __init__(self, block_expansion, num_kp, @@ -26,79 +27,90 @@ def __init__(self, mobile_net=False): super(KPDetector, self).__init__() - self.predictor = Hourglass(block_expansion, - in_features=num_channels, - max_features=max_features, - num_blocks=num_blocks, - mobile_net=mobile_net) + self.predictor = Hourglass( + block_expansion, + in_features=num_channels, + max_features=max_features, + num_blocks=num_blocks, + mobile_net=mobile_net) if mobile_net: self.kp = nn.Sequential( - nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=self.predictor.out_filters, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=pad), - nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=self.predictor.out_filters, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=pad), - nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=num_kp, - kernel_size=3, - weight_attr=nn.initializer.KaimingUniform(), - padding=pad)) + nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=self.predictor.out_filters, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=pad), + nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=self.predictor.out_filters, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=pad), + nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=num_kp, + kernel_size=3, + weight_attr=nn.initializer.KaimingUniform(), + padding=pad)) else: - self.kp = nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=num_kp, - kernel_size=(7, 7), - padding=pad) + self.kp = nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=num_kp, + kernel_size=(7, 7), + padding=pad) if estimate_jacobian: self.num_jacobian_maps = 1 if single_jacobian_map else num_kp if mobile_net: self.jacobian = nn.Sequential( - nn.Conv2D(in_channels=self.predictor.out_filters, + nn.Conv2D( + in_channels=self.predictor.out_filters, out_channels=self.predictor.out_filters, kernel_size=3, padding=pad), - nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=self.predictor.out_filters, - kernel_size=3, - padding=pad), - nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=4 * self.num_jacobian_maps, - kernel_size=3, - padding=pad), - ) + nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=self.predictor.out_filters, + kernel_size=3, + padding=pad), + nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=4 * self.num_jacobian_maps, + kernel_size=3, + padding=pad), ) self.jacobian[0].weight.set_value( - paddle.zeros(self.jacobian[0].weight.shape, dtype='float32')) + paddle.zeros( + self.jacobian[0].weight.shape, dtype='float32')) self.jacobian[1].weight.set_value( - paddle.zeros(self.jacobian[1].weight.shape, dtype='float32')) + paddle.zeros( + self.jacobian[1].weight.shape, dtype='float32')) self.jacobian[2].weight.set_value( - paddle.zeros(self.jacobian[2].weight.shape, dtype='float32')) + paddle.zeros( + self.jacobian[2].weight.shape, dtype='float32')) self.jacobian[2].bias.set_value( - paddle.to_tensor([1, 0, 0, 1] * - self.num_jacobian_maps).astype('float32')) + paddle.to_tensor([1, 0, 0, 1] * self.num_jacobian_maps) + .astype('float32')) else: - self.jacobian = nn.Conv2D(in_channels=self.predictor.out_filters, - out_channels=4 * self.num_jacobian_maps, - kernel_size=(7, 7), - padding=pad) + self.jacobian = nn.Conv2D( + in_channels=self.predictor.out_filters, + out_channels=4 * self.num_jacobian_maps, + kernel_size=(7, 7), + padding=pad) self.jacobian.weight.set_value( - paddle.zeros(self.jacobian.weight.shape, dtype='float32')) + paddle.zeros( + self.jacobian.weight.shape, dtype='float32')) self.jacobian.bias.set_value( - paddle.to_tensor([1, 0, 0, 1] * - self.num_jacobian_maps).astype('float32')) + paddle.to_tensor([1, 0, 0, 1] * self.num_jacobian_maps) + .astype('float32')) else: self.jacobian = None self.temperature = temperature self.scale_factor = scale_factor if self.scale_factor != 1: - self.down = AntiAliasInterpolation2d(num_channels, - self.scale_factor, - mobile_net=mobile_net) + self.down = AntiAliasInterpolation2d( + num_channels, self.scale_factor, mobile_net=mobile_net) def gaussian2kp(self, heatmap): """ diff --git a/paddlers/models/ppgan/modules/norm.py b/paddlers/models/ppgan/modules/norm.py index 2cc3e994..f584ac23 100644 --- a/paddlers/models/ppgan/modules/norm.py +++ b/paddlers/models/ppgan/modules/norm.py @@ -47,9 +47,10 @@ def build_norm_layer(norm_type='instance'): initializer=nn.initializer.Constant(1.0), learning_rate=0.0, trainable=False), - bias_attr=paddle.ParamAttr(initializer=nn.initializer.Constant(0.0), - learning_rate=0.0, - trainable=False)) + bias_attr=paddle.ParamAttr( + initializer=nn.initializer.Constant(0.0), + learning_rate=0.0, + trainable=False)) elif norm_type == 'spectral': norm_layer = functools.partial(Spectralnorm) elif norm_type == 'none': diff --git a/paddlers/models/ppgan/modules/wing.py b/paddlers/models/ppgan/modules/wing.py index 4cdc1826..a4513e84 100644 --- a/paddlers/models/ppgan/modules/wing.py +++ b/paddlers/models/ppgan/modules/wing.py @@ -23,16 +23,17 @@ def __init__(self, num_modules, depth, num_features, first_one=False): self.num_modules = num_modules self.depth = depth self.features = num_features - self.coordconv = CoordConvTh(64, - 64, - True, - True, - 256, - first_one, - out_channels=256, - kernel_size=1, - stride=1, - padding=0) + self.coordconv = CoordConvTh( + 64, + 64, + True, + True, + 256, + first_one, + out_channels=256, + kernel_size=1, + stride=1, + padding=0) self._generate_network(self.depth) def _generate_network(self, level): @@ -79,13 +80,13 @@ def __init__(self, height=64, width=64, with_r=False, with_boundary=False): (height, width)).astype('float32') x_coords = (x_coords / (height - 1)) * 2 - 1 y_coords = (y_coords / (width - 1)) * 2 - 1 - coords = paddle.stack([x_coords, y_coords], - axis=0) # (2, height, width) + coords = paddle.stack( + [x_coords, y_coords], axis=0) # (2, height, width) if self.with_r: rr = paddle.sqrt( - paddle.pow(x_coords, 2) + - paddle.pow(y_coords, 2)) # (height, width) + paddle.pow(x_coords, 2) + paddle.pow(y_coords, + 2)) # (height, width) rr = (rr / paddle.max(rr)).unsqueeze(0) coords = paddle.concat([coords, rr], axis=0) @@ -115,6 +116,7 @@ def forward(self, x, heatmap=None): class CoordConvTh(nn.Layer): """CoordConv layer as in the paper.""" + def __init__(self, height, width, @@ -144,12 +146,13 @@ class ConvBlock(nn.Layer): def __init__(self, in_planes, out_planes): super(ConvBlock, self).__init__() self.bn1 = nn.BatchNorm2D(in_planes) - conv3x3 = partial(nn.Conv2D, - kernel_size=3, - stride=1, - padding=1, - bias_attr=False, - dilation=1) + conv3x3 = partial( + nn.Conv2D, + kernel_size=3, + stride=1, + padding=1, + bias_attr=False, + dilation=1) self.conv1 = conv3x3(in_planes, int(out_planes / 2)) self.bn2 = nn.BatchNorm2D(int(out_planes / 2)) self.conv2 = conv3x3(int(out_planes / 2), int(out_planes / 4)) @@ -159,8 +162,10 @@ def __init__(self, in_planes, out_planes): self.downsample = None if in_planes != out_planes: self.downsample = nn.Sequential( - nn.BatchNorm2D(in_planes), nn.ReLU(True), - nn.Conv2D(in_planes, out_planes, 1, 1, bias_attr=False)) + nn.BatchNorm2D(in_planes), + nn.ReLU(True), + nn.Conv2D( + in_planes, out_planes, 1, 1, bias_attr=False)) def forward(self, x): residual = x @@ -235,16 +240,17 @@ def shift(x, N): IDXPAIR = namedtuple('IDXPAIR', 'start end') -index_map = Munch(chin=IDXPAIR(0 + 8, 33 - 8), - eyebrows=IDXPAIR(33, 51), - eyebrowsedges=IDXPAIR(33, 46), - nose=IDXPAIR(51, 55), - nostrils=IDXPAIR(55, 60), - eyes=IDXPAIR(60, 76), - lipedges=IDXPAIR(76, 82), - lipupper=IDXPAIR(77, 82), - liplower=IDXPAIR(83, 88), - lipinner=IDXPAIR(88, 96)) +index_map = Munch( + chin=IDXPAIR(0 + 8, 33 - 8), + eyebrows=IDXPAIR(33, 51), + eyebrowsedges=IDXPAIR(33, 46), + nose=IDXPAIR(51, 55), + nostrils=IDXPAIR(55, 60), + eyes=IDXPAIR(60, 76), + lipedges=IDXPAIR(76, 82), + lipupper=IDXPAIR(77, 82), + liplower=IDXPAIR(83, 88), + lipinner=IDXPAIR(88, 96)) OPPAIR = namedtuple('OPPAIR', 'shift resize') @@ -255,12 +261,13 @@ def preprocess(x): x = normalize(x) sw = H // 256 - operations = Munch(chin=OPPAIR(0, 3), - eyebrows=OPPAIR(-7 * sw, 2), - nostrils=OPPAIR(8 * sw, 4), - lipupper=OPPAIR(-8 * sw, 4), - liplower=OPPAIR(8 * sw, 4), - lipinner=OPPAIR(-2 * sw, 3)) + operations = Munch( + chin=OPPAIR(0, 3), + eyebrows=OPPAIR(-7 * sw, 2), + nostrils=OPPAIR(8 * sw, 4), + lipupper=OPPAIR(-8 * sw, 4), + liplower=OPPAIR(8 * sw, 4), + lipinner=OPPAIR(-2 * sw, 3)) for part, ops in operations.items(): start, end = index_map[part] @@ -268,8 +275,7 @@ def preprocess(x): zero_out = paddle.concat([ paddle.arange(0, index_map.chin.start), - paddle.arange(index_map.chin.end, 33), - paddle.to_tensor([ + paddle.arange(index_map.chin.end, 33), paddle.to_tensor([ index_map.eyebrowsedges.start, index_map.eyebrowsedges.end, index_map.lipedges.start, index_map.lipedges.end ]) @@ -291,10 +297,10 @@ def preprocess(x): # Second-level mask x2 = deepcopy(x) x2[:, index_map.chin.start:index_map.chin.end] = 0 # start:end was 0:33 - x2[:, index_map.lipedges.start:index_map.lipinner. - end] = 0 # start:end was 76:96 - x2[:, index_map.eyebrows.start:index_map.eyebrows. - end] = 0 # start:end was 33:51 + x2[:, index_map.lipedges.start: + index_map.lipinner.end] = 0 # start:end was 76:96 + x2[:, index_map.eyebrows.start: + index_map.eyebrows.end] = 0 # start:end was 33:51 x = paddle.sum(x, axis=1, keepdim=True) # (N, 1, H, W) x2 = paddle.sum(x2, axis=1, keepdim=True) # mask without faceline and mouth diff --git a/paddlers/models/ppgan/solver/lr_scheduler.py b/paddlers/models/ppgan/solver/lr_scheduler.py index ca68528a..41701a50 100644 --- a/paddlers/models/ppgan/solver/lr_scheduler.py +++ b/paddlers/models/ppgan/solver/lr_scheduler.py @@ -38,8 +38,8 @@ def __init__(self, learning_rate, start_epoch, decay_epochs, iters_per_epoch): def lambda_rule(epoch): epoch = epoch // iters_per_epoch - lr_l = 1.0 - max(0, - epoch + 1 - start_epoch) / float(decay_epochs + 1) + lr_l = 1.0 - max(0, epoch + 1 - + start_epoch) / float(decay_epochs + 1) return lr_l super().__init__(learning_rate, lambda_rule) @@ -66,6 +66,7 @@ class CosineAnnealingRestartLR(LRScheduler): eta_min (float): The mimimum learning rate of the cosine anneling cycle. Default: 0. last_epoch (int): Used in paddle.nn._LRScheduler. Default: -1. """ + def __init__(self, learning_rate, periods, diff --git a/paddlers/models/ppgan/utils/audio.py b/paddlers/models/ppgan/utils/audio.py index 432305bd..ef4e07d1 100644 --- a/paddlers/models/ppgan/utils/audio.py +++ b/paddlers/models/ppgan/utils/audio.py @@ -81,10 +81,11 @@ def _stft(y): return _lws_processor(audio_config).stft(y).T else: librosa = try_import('librosa') - return librosa.stft(y=y, - n_fft=audio_config.n_fft, - hop_length=get_hop_size(), - win_length=audio_config.win_size) + return librosa.stft( + y=y, + n_fft=audio_config.n_fft, + hop_length=get_hop_size(), + win_length=audio_config.win_size) ########################################################## @@ -149,16 +150,15 @@ def _db_to_amp(x): def _normalize(S): if audio_config.allow_clipping_in_normalization: if audio_config.symmetric_mels: - return np.clip( - (2 * audio_config.max_abs_value) * - ((S - audio_config.min_level_db) / - (-audio_config.min_level_db)) - audio_config.max_abs_value, - -audio_config.max_abs_value, audio_config.max_abs_value) + return np.clip((2 * audio_config.max_abs_value) * ( + (S - audio_config.min_level_db) / + (-audio_config.min_level_db)) - audio_config.max_abs_value, + -audio_config.max_abs_value, + audio_config.max_abs_value) else: - return np.clip( - audio_config.max_abs_value * ((S - audio_config.min_level_db) / - (-audio_config.min_level_db)), 0, - audio_config.max_abs_value) + return np.clip(audio_config.max_abs_value * ( + (S - audio_config.min_level_db) / (-audio_config.min_level_db)), + 0, audio_config.max_abs_value) assert S.max() <= 0 and S.min() - audio_config.min_level_db >= 0 if audio_config.symmetric_mels: @@ -166,18 +166,18 @@ def _normalize(S): (S - audio_config.min_level_db) / (-audio_config.min_level_db)) - audio_config.max_abs_value else: - return audio_config.max_abs_value * ((S - audio_config.min_level_db) / - (-audio_config.min_level_db)) + return audio_config.max_abs_value * ( + (S - audio_config.min_level_db) / (-audio_config.min_level_db)) def _denormalize(D): if audio_config.allow_clipping_in_normalization: if audio_config.symmetric_mels: - return (((np.clip(D, -audio_config.max_abs_value, - audio_config.max_abs_value) + - audio_config.max_abs_value) * -audio_config.min_level_db / - (2 * audio_config.max_abs_value)) + - audio_config.min_level_db) + return ( + ((np.clip(D, -audio_config.max_abs_value, + audio_config.max_abs_value) + + audio_config.max_abs_value) * -audio_config.min_level_db / + (2 * audio_config.max_abs_value)) + audio_config.min_level_db) else: return ((np.clip(D, 0, audio_config.max_abs_value) * -audio_config.min_level_db / audio_config.max_abs_value) + diff --git a/paddlers/models/ppgan/utils/filesystem.py b/paddlers/models/ppgan/utils/filesystem.py index 9b0ce88b..b32184f9 100644 --- a/paddlers/models/ppgan/utils/filesystem.py +++ b/paddlers/models/ppgan/utils/filesystem.py @@ -32,9 +32,7 @@ def convert(state_dict): model_dict = {} for k, v in state_dict.items(): - if isinstance( - v, - (paddle.static.Variable, paddle.Tensor)): + if isinstance(v, (paddle.static.Variable, paddle.Tensor)): model_dict[k] = v.numpy() else: model_dict[k] = v @@ -43,9 +41,7 @@ def convert(state_dict): final_dict = {} for k, v in state_dicts.items(): - if isinstance( - v, - (paddle.static.Variable, paddle.Tensor)): + if isinstance(v, (paddle.static.Variable, paddle.Tensor)): final_dict = convert(state_dicts) break elif isinstance(v, dict): diff --git a/paddlers/models/ppgan/utils/image_pool.py b/paddlers/models/ppgan/utils/image_pool.py index e76dcead..956b32a3 100644 --- a/paddlers/models/ppgan/utils/image_pool.py +++ b/paddlers/models/ppgan/utils/image_pool.py @@ -25,6 +25,7 @@ class ImagePool(): Args: pool_size (int) -- the size of image buffer, if pool_size=0, no buffer will be created """ + def __init__(self, pool_size, prob=0.5): self.pool_size = pool_size self.prob = prob diff --git a/paddlers/models/ppgan/utils/photopen.py b/paddlers/models/ppgan/utils/photopen.py index a8fdf263..82b6e657 100644 --- a/paddlers/models/ppgan/utils/photopen.py +++ b/paddlers/models/ppgan/utils/photopen.py @@ -20,15 +20,22 @@ import functools from paddle.nn import Conv1DTranspose, Conv2DTranspose, Conv3DTranspose, Linear + # 处理图片数据:裁切、水平翻转、调整图片数据形状、归一化数据 -def data_transform(img, resize_w, resize_h, load_size=286, pos=[0, 0, 256, 256], flip=True, is_image=True): +def data_transform(img, + resize_w, + resize_h, + load_size=286, + pos=[0, 0, 256, 256], + flip=True, + is_image=True): if is_image: resized = img.resize((resize_w, resize_h), Image.BICUBIC) else: resized = img.resize((resize_w, resize_h), Image.NEAREST) croped = resized.crop((pos[0], pos[1], pos[2], pos[3])) fliped = ImageOps.mirror(croped) if flip else croped - fliped = np.array(fliped) # transform to numpy array + fliped = np.array(fliped) # transform to numpy array expanded = np.expand_dims(fliped, 2) if len(fliped.shape) < 3 else fliped transposed = np.transpose(expanded, (2, 0, 1)).astype('float32') if is_image: @@ -37,34 +44,55 @@ def data_transform(img, resize_w, resize_h, load_size=286, pos=[0, 0, 256, 256], normalized = transposed return normalized + # 定义CoCo数据集对象 class COCODateset(Dataset): def __init__(self, opt): super(COCODateset, self).__init__() - inst_dir = opt.dataroot+'train_inst/' + inst_dir = opt.dataroot + 'train_inst/' _, _, inst_list = next(os.walk(inst_dir)) self.inst_list = np.sort(inst_list) self.opt = opt def __getitem__(self, idx): - ins = Image.open(self.opt.dataroot+'train_inst/'+self.inst_list[idx]) - img = Image.open(self.opt.dataroot+'train_img/'+self.inst_list[idx].replace(".png", ".jpg")) + ins = Image.open(self.opt.dataroot + 'train_inst/' + self.inst_list[ + idx]) + img = Image.open(self.opt.dataroot + 'train_img/' + self.inst_list[idx] + .replace(".png", ".jpg")) img = img.convert('RGB') w, h = img.size resize_w, resize_h = 0, 0 if w < h: - resize_w, resize_h = self.opt.load_size, int(h * self.opt.load_size / w) + resize_w, resize_h = self.opt.load_size, int(h * + self.opt.load_size / w) else: - resize_w, resize_h = int(w * self.opt.load_size / h), self.opt.load_size + resize_w, resize_h = int(w * self.opt.load_size / + h), self.opt.load_size left = random.randint(0, resize_w - self.opt.crop_size) top = random.randint(0, resize_h - self.opt.crop_size) flip = False - - img = data_transform(img, resize_w, resize_h, load_size=opt.load_size, - pos=[left, top, left + self.opt.crop_size, top + self.opt.crop_size], flip=flip, is_image=True) - ins = data_transform(ins, resize_w, resize_h, load_size=opt.load_size, - pos=[left, top, left + self.opt.crop_size, top + self.opt.crop_size], flip=flip, is_image=False) + + img = data_transform( + img, + resize_w, + resize_h, + load_size=opt.load_size, + pos=[ + left, top, left + self.opt.crop_size, top + self.opt.crop_size + ], + flip=flip, + is_image=True) + ins = data_transform( + ins, + resize_w, + resize_h, + load_size=opt.load_size, + pos=[ + left, top, left + self.opt.crop_size, top + self.opt.crop_size + ], + flip=flip, + is_image=False) return img, ins, self.inst_list[idx] @@ -85,14 +113,17 @@ def data_onehot_pro(instance, opt): edge = np.zeros(instance.shape, 'int64') t = instance.numpy() edge[:, :, :, 1:] = edge[:, :, :, 1:] | (t[:, :, :, 1:] != t[:, :, :, :-1]) - edge[:, :, :, :-1] = edge[:, :, :, :-1] | (t[:, :, :, 1:] != t[:, :, :, :-1]) + edge[:, :, :, :-1] = edge[:, :, :, :-1] | ( + t[:, :, :, 1:] != t[:, :, :, :-1]) edge[:, :, 1:, :] = edge[:, :, 1:, :] | (t[:, :, 1:, :] != t[:, :, :-1, :]) - edge[:, :, :-1, :] = edge[:, :, :-1, :] | (t[:, :, 1:, :] != t[:, :, :-1, :]) + edge[:, :, :-1, :] = edge[:, :, :-1, :] | ( + t[:, :, 1:, :] != t[:, :, :-1, :]) edge = paddle.to_tensor(edge).astype('float32') semantics = paddle.concat([semantics, edge], 1) return semantics + # 设置除 spade 以外的归一化层 def build_norm_layer(norm_type='instance'): """Return a normalization layer @@ -105,17 +136,12 @@ def build_norm_layer(norm_type='instance'): """ if norm_type == 'batch': norm_layer = functools.partial( - nn.BatchNorm2D, - weight_attr=False, - bias_attr=False) + nn.BatchNorm2D, weight_attr=False, bias_attr=False) elif norm_type == 'syncbatch': norm_layer = functools.partial( - nn.SyncBatchNorm, - weight_attr=False, - bias_attr=False) + nn.SyncBatchNorm, weight_attr=False, bias_attr=False) elif norm_type == 'instance': - norm_layer = functools.partial( - nn.InstanceNorm2D,) + norm_layer = functools.partial(nn.InstanceNorm2D, ) elif norm_type == 'spectral': norm_layer = functools.partial(Spectralnorm) elif norm_type == 'none': @@ -127,14 +153,16 @@ def norm_layer(x): norm_type) return norm_layer + def simam(x, e_lambda=1e-4): b, c, h, w = x.shape n = w * h - 1 - x_minus_mu_square = (x - x.mean(axis=[2, 3], keepdim=True)) ** 2 - y = x_minus_mu_square / (4 * (x_minus_mu_square.sum(axis=[2, 3], keepdim=True) / n + e_lambda)) + 0.5 + x_minus_mu_square = (x - x.mean(axis=[2, 3], keepdim=True))**2 + y = x_minus_mu_square / (4 * (x_minus_mu_square.sum( + axis=[2, 3], keepdim=True) / n + e_lambda)) + 0.5 return x * nn.functional.sigmoid(y) + class Dict(dict): __setattr__ = dict.__setitem__ __getattr__ = dict.__getitem__ - diff --git a/paddlers/models/ppgan/utils/setup.py b/paddlers/models/ppgan/utils/setup.py index 78df1092..b7301fcc 100644 --- a/paddlers/models/ppgan/utils/setup.py +++ b/paddlers/models/ppgan/utils/setup.py @@ -19,6 +19,7 @@ import random from .logger import setup_logger + def setup(args, cfg): if args.evaluate_only: cfg.is_train = False @@ -44,10 +45,9 @@ def setup(args, cfg): paddle.set_device('gpu') else: paddle.set_device('cpu') - + if args.seed: paddle.seed(args.seed) random.seed(args.seed) - np.random.seed(args.seed) + np.random.seed(args.seed) paddle.framework.random._manual_program_seed(args.seed) - diff --git a/paddlers/models/ppgan/utils/visual.py b/paddlers/models/ppgan/utils/visual.py index 69826346..b0cea537 100644 --- a/paddlers/models/ppgan/utils/visual.py +++ b/paddlers/models/ppgan/utils/visual.py @@ -36,8 +36,8 @@ def make_grid(tensor, nrow=8, normalize=False, range=None, scale_each=False): images separately rather than the (min, max) over all images. Default: ``False``. """ if not (isinstance(tensor, paddle.Tensor) or - (isinstance(tensor, list) - and all(isinstance(t, paddle.Tensor) for t in tensor))): + (isinstance(tensor, list) and + all(isinstance(t, paddle.Tensor) for t in tensor))): raise TypeError('tensor or list of tensors expected, got {}'.format( type(tensor))) @@ -92,15 +92,15 @@ def norm_range(t, range): ymaps = int(math.ceil(float(nmaps) / xmaps)) height, width = int(tensor.shape[2]), int(tensor.shape[3]) num_channels = tensor.shape[1] - canvas = paddle.zeros((num_channels, height * ymaps, width * xmaps), - dtype=tensor.dtype) + canvas = paddle.zeros( + (num_channels, height * ymaps, width * xmaps), dtype=tensor.dtype) k = 0 for y in irange(ymaps): for x in irange(xmaps): if k >= nmaps: break - canvas[:, y * height:(y + 1) * height, - x * width:(x + 1) * width] = tensor[k] + canvas[:, y * height:(y + 1) * height, x * width:(x + 1) * + width] = tensor[k] k = k + 1 return canvas @@ -113,6 +113,7 @@ def tensor2img(input_image, min_max=(-1., 1.), image_num=1, imtype=np.uint8): image_num (int): the convert iamge numbers imtype (type): the desired type of the converted numpy array """ + def processing(img, transpose=True): """"processing one numpy image. @@ -152,7 +153,8 @@ def processing(img, transpose=True): else: # for more image, log NCHW image image_numpy = np.stack( - [processing(im, transpose=False) for im in image_numpy]) + [processing( + im, transpose=False) for im in image_numpy]) else: # if it is a numpy array, do nothing diff --git a/paddlers/models/ppseg/core/infer.py b/paddlers/models/ppseg/core/infer.py index a66288d7..cacdb9dd 100644 --- a/paddlers/models/ppseg/core/infer.py +++ b/paddlers/models/ppseg/core/infer.py @@ -64,10 +64,10 @@ def get_reverse_list(ori_shape, transforms): else: w = long_edge h = short_edge - if op.__class__.__name__ in ['Padding']: + if op.__class__.__name__ in ['Pad']: reverse_list.append(('padding', (h, w))) w, h = op.target_size[0], op.target_size[1] - if op.__class__.__name__ in ['PaddingByAspectRatio']: + if op.__class__.__name__ in ['PadByAspectRatio']: reverse_list.append(('padding', (h, w))) ratio = w / h if ratio == op.aspect_ratio: diff --git a/paddlers/models/ppseg/datasets/dataset.py b/paddlers/models/ppseg/datasets/dataset.py index a9fab0d8..b4faf6a1 100644 --- a/paddlers/models/ppseg/datasets/dataset.py +++ b/paddlers/models/ppseg/datasets/dataset.py @@ -49,7 +49,7 @@ class Dataset(paddle.io.Dataset): import paddlers.models.ppseg.transforms as T from paddlers.models.ppseg.datasets import Dataset - transforms = [T.RandomPaddingCrop(crop_size=(512,512)), T.Normalize()] + transforms = [T.RandomPadCrop(crop_size=(512,512)), T.Normalize()] dataset_root = 'dataset_root_path' train_path = 'train_path' num_classes = 2 diff --git a/paddlers/models/ppseg/models/__init__.py b/paddlers/models/ppseg/models/__init__.py index 62b3cc64..4a7d535b 100644 --- a/paddlers/models/ppseg/models/__init__.py +++ b/paddlers/models/ppseg/models/__init__.py @@ -54,4 +54,4 @@ from .enet import ENet from .bisenetv1 import BiseNetV1 from .fastfcn import FastFCN -from .pfpnnet import PFPNNet \ No newline at end of file +from .pfpnnet import PFPNNet diff --git a/paddlers/models/ppseg/transforms/transforms.py b/paddlers/models/ppseg/transforms/transforms.py index b5bb6352..5f2b6406 100644 --- a/paddlers/models/ppseg/transforms/transforms.py +++ b/paddlers/models/ppseg/transforms/transforms.py @@ -480,7 +480,7 @@ def __call__(self, im, label=None): @manager.TRANSFORMS.add_component -class Padding: +class Pad: """ Add bottom-right padding to a raw image or annotation image. @@ -560,7 +560,7 @@ def __call__(self, im, label=None): @manager.TRANSFORMS.add_component -class PaddingByAspectRatio: +class PadByAspectRatio: """ Args: @@ -597,15 +597,14 @@ def __call__(self, im, label=None): img_height = int(img_width / self.aspect_ratio) else: img_width = int(img_height * self.aspect_ratio) - padding = Padding( - (img_width, img_height), - im_padding_value=self.im_padding_value, - label_padding_value=self.label_padding_value) + padding = Pad((img_width, img_height), + im_padding_value=self.im_padding_value, + label_padding_value=self.label_padding_value) return padding(im, label) @manager.TRANSFORMS.add_component -class RandomPaddingCrop: +class RandomPadCrop: """ Crop a sub-image from a raw image and annotation image randomly. If the target cropping size is larger than original image, then the bottom-right padding will be added. @@ -768,7 +767,7 @@ def __call__(self, im, label=None): @manager.TRANSFORMS.add_component -class ScalePadding: +class ScalePad: """ Add center padding to a raw image or annotation image,then scale the image to target size. diff --git a/paddlers/tasks/__init__.py b/paddlers/tasks/__init__.py index a835a5cc..ca50e606 100644 --- a/paddlers/tasks/__init__.py +++ b/paddlers/tasks/__init__.py @@ -17,4 +17,4 @@ from .change_detector import * from .classifier import * from .load_model import load_model -from .image_restorer import * \ No newline at end of file +from .image_restorer import * diff --git a/paddlers/tasks/change_detector.py b/paddlers/tasks/change_detector.py index 9035127b..21eac252 100644 --- a/paddlers/tasks/change_detector.py +++ b/paddlers/tasks/change_detector.py @@ -29,7 +29,7 @@ import paddlers.utils.logging as logging import paddlers.models.ppseg as paddleseg from paddlers.transforms import arrange_transforms -from paddlers.transforms import ImgDecoder, Resize +from paddlers.transforms import Resize, decode_image from paddlers.utils import get_single_card_bs, DisablePrint from paddlers.utils.checkpoint import seg_pretrain_weights_dict from .base import BaseModel @@ -501,9 +501,9 @@ def predict(self, img_file, transforms=None): Do inference. Args: Args: - img_file(List[tuple], Tuple[str or np.ndarray]): - Tuple of image paths or decoded image data in a BGR format for bi-temporal images, which also could constitute - a list, meaning all image pairs to be predicted as a mini-batch. + img_file (list[tuple] | tuple[str | np.ndarray]): + Tuple of image paths or decoded image data for bi-temporal images, which also could constitute a list, + meaning all image pairs to be predicted as a mini-batch. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. @@ -546,19 +546,24 @@ def predict(self, img_file, transforms=None): } return prediction - def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None): + def slider_predict(self, + img_file, + save_dir, + block_size, + overlap=36, + transforms=None): """ Do inference. Args: Args: - img_file(List[str]): + img_file(list[str]): List of image paths. save_dir(str): Directory that contains saved geotiff file. - block_size(List[int] or Tuple[int], int): - The size of block. - overlap(List[int] or Tuple[int], int): - The overlap between two blocks. Defaults to 36. + block_size(list[int] | tuple[int] | int, optional): + Size of block. + overlap(list[int] | tuple[int] | int, optional): + Overlap between two blocks. Defaults to 36. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. """ @@ -566,7 +571,7 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= from osgeo import gdal except: import gdal - + if len(img_file) != 2: raise ValueError("`img_file` must be a list of length 2.") if isinstance(block_size, int): @@ -574,13 +579,15 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= elif isinstance(block_size, (tuple, list)) and len(block_size) == 2: block_size = tuple(block_size) else: - raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.") + raise ValueError( + "`block_size` must be a tuple/list of length 2 or an integer.") if isinstance(overlap, int): overlap = (overlap, overlap) elif isinstance(overlap, (tuple, list)) and len(overlap) == 2: overlap = tuple(overlap) else: - raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.") + raise ValueError( + "`overlap` must be a tuple/list of length 2 or an integer.") src1_data = gdal.Open(img_file[0]) src2_data = gdal.Open(img_file[1]) @@ -589,7 +596,8 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= bands = src1_data.RasterCount driver = gdal.GetDriverByName("GTiff") - file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[0] + ".tif" + file_name = osp.splitext(osp.normpath(img_file[0]).split(os.sep)[-1])[ + 0] + ".tif" if not osp.exists(save_dir): os.makedirs(save_dir) save_file = osp.join(save_dir, file_name) @@ -607,17 +615,21 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= xsize = int(width - xoff) if yoff + ysize > height: ysize = int(height - yoff) - im1 = src1_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0)) - im2 = src2_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0)) + im1 = src1_data.ReadAsArray( + int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0)) + im2 = src2_data.ReadAsArray( + int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0)) # fill h, w = im1.shape[:2] - im1_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im1.dtype) + im1_fill = np.zeros( + (block_size[1], block_size[0], bands), dtype=im1.dtype) im2_fill = im1_fill.copy() im1_fill[:h, :w, :] = im1 im2_fill[:h, :w, :] = im2 im_fill = (im1_fill, im2_fill) # predict - pred = self.predict(im_fill, transforms)["label_map"].astype("uint8") + pred = self.predict(im_fill, + transforms)["label_map"].astype("uint8") # overlap rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize) mask = (rd_block == pred[:h, :w]) | (rd_block == 255) @@ -634,13 +646,12 @@ def _preprocess(self, images, transforms, to_tensor=True): batch_im1, batch_im2 = list(), list() batch_ori_shape = list() for im1, im2 in images: - sample = {'image_t1': im1, 'image_t2': im2} - if isinstance(sample['image_t1'], str) or \ - isinstance(sample['image_t2'], str): - sample = ImgDecoder(to_rgb=False)(sample) - ori_shape = sample['image'].shape[:2] - else: - ori_shape = im1.shape[:2] + if isinstance(im1, str) or isinstance(im2, str): + im1 = decode_image(im1, to_rgb=False) + im2 = decode_image(im2, to_rgb=False) + ori_shape = im1.shape[:2] + # XXX: sample do not contain 'image_t1' and 'image_t2'. + sample = {'image': im1, 'image2': im2} im1, im2 = transforms(sample)[:2] batch_im1.append(im1) batch_im2.append(im2) @@ -679,7 +690,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms): scale = float(op.long_size) / float(im_long_size) h = int(round(h * scale)) w = int(round(w * scale)) - elif op.__class__.__name__ == 'Padding': + elif op.__class__.__name__ == 'Pad': if op.target_size: target_h, target_w = op.target_size else: diff --git a/paddlers/tasks/classifier.py b/paddlers/tasks/classifier.py index 84564b69..c29d73f0 100644 --- a/paddlers/tasks/classifier.py +++ b/paddlers/tasks/classifier.py @@ -33,7 +33,7 @@ from paddlers.models.ppcls.loss import build_loss from paddlers.models.ppcls.data.postprocess import build_postprocess from paddlers.utils.checkpoint import cls_pretrain_weights_dict -from paddlers.transforms import ImgDecoder, Resize +from paddlers.transforms import Resize, decode_image __all__ = [ "ResNet50_vd", "MobileNetV3_small_x1_0", "HRNet_W18_C", "CondenseNetV2_b" @@ -410,9 +410,9 @@ def predict(self, img_file, transforms=None): Do inference. Args: Args: - img_file(List[np.ndarray or str], str or np.ndarray): - Image path or decoded image data in a BGR format, which also could constitute a list, - meaning all images to be predicted as a mini-batch. + img_file(list[np.ndarray | str] | str | np.ndarray): + Image path or decoded image data, which also could constitute a list, meaning all images to be + predicted as a mini-batch. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. @@ -465,10 +465,10 @@ def _preprocess(self, images, transforms, to_tensor=True): batch_im = list() batch_ori_shape = list() for im in images: + if isinstance(im, str): + im = decode_image(im, to_rgb=False) + ori_shape = im.shape[:2] sample = {'image': im} - if isinstance(sample['image'], str): - sample = ImgDecoder(to_rgb=False)(sample) - ori_shape = sample['image'].shape[:2] im = transforms(sample) batch_im.append(im) batch_ori_shape.append(ori_shape) @@ -504,7 +504,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms): scale = float(op.long_size) / float(im_long_size) h = int(round(h * scale)) w = int(round(w * scale)) - elif op.__class__.__name__ == 'Padding': + elif op.__class__.__name__ == 'Pad': if op.target_size: target_h, target_w = op.target_size else: diff --git a/paddlers/tasks/object_detector.py b/paddlers/tasks/object_detector.py index 0b8c74d3..f6ac956e 100644 --- a/paddlers/tasks/object_detector.py +++ b/paddlers/tasks/object_detector.py @@ -27,9 +27,10 @@ from paddlers.models.ppdet.modeling.proposal_generator.target_layer import BBoxAssigner, MaskAssigner import paddlers import paddlers.utils.logging as logging -from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Padding +from paddlers.transforms import decode_image +from paddlers.transforms.operators import _NormalizeBox, _PadBox, _BboxXYXY2XYWH, Resize, Pad from paddlers.transforms.batch_operators import BatchCompose, BatchRandomResize, BatchRandomResizeByShort, \ - _BatchPadding, _Gt2YoloTarget + _BatchPad, _Gt2YoloTarget from paddlers.transforms import arrange_transforms from .base import BaseModel from .utils.det_metrics import VOCMetric, COCOMetric @@ -37,8 +38,7 @@ from paddlers.utils.checkpoint import det_pretrain_weights_dict __all__ = [ - "YOLOv3", "FasterRCNN", "PPYOLO", "PPYOLOTiny", "PPYOLOv2", "MaskRCNN", - "PicoDet" + "YOLOv3", "FasterRCNN", "PPYOLO", "PPYOLOTiny", "PPYOLOv2", "MaskRCNN" ] @@ -511,9 +511,9 @@ def predict(self, img_file, transforms=None): """ Do inference. Args: - img_file(List[np.ndarray or str], str or np.ndarray): - Image path or decoded image data in a BGR format, which also could constitute a list, - meaning all images to be predicted as a mini-batch. + img_file(list[np.ndarray | str] | str | np.ndarray): + Image path or decoded image data, which also could constitute a list,meaning all images to be + predicted as a mini-batch. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. Returns: @@ -549,8 +549,11 @@ def _preprocess(self, images, transforms, to_tensor=True): model_type=self.model_type, transforms=transforms, mode='test') batch_samples = list() for im in images: + if isinstance(im, str): + im = decode_image(im, to_rgb=False) sample = {'image': im} - batch_samples.append(transforms(sample)) + sample = transforms(sample) + batch_samples.append(sample) batch_transforms = self._compose_batch_transform(transforms, 'test') batch_samples = batch_transforms(batch_samples) if to_tensor: @@ -757,7 +760,7 @@ def __init__(self, model_name='PicoDet', num_classes=num_classes, **params) def _compose_batch_transform(self, transforms, mode='train'): - default_batch_transforms = [_BatchPadding(pad_to_stride=32)] + default_batch_transforms = [_BatchPad(pad_to_stride=32)] if mode == 'eval': collate_batch = True else: @@ -1005,7 +1008,7 @@ def __init__(self, def _compose_batch_transform(self, transforms, mode='train'): if mode == 'train': default_batch_transforms = [ - _BatchPadding(pad_to_stride=-1), _NormalizeBox(), + _BatchPad(pad_to_stride=-1), _NormalizeBox(), _PadBox(getattr(self, 'num_max_boxes', 50)), _BboxXYXY2XYWH(), _Gt2YoloTarget( anchor_masks=self.anchor_masks, @@ -1015,7 +1018,7 @@ def _compose_batch_transform(self, transforms, mode='train'): num_classes=self.num_classes) ] else: - default_batch_transforms = [_BatchPadding(pad_to_stride=-1)] + default_batch_transforms = [_BatchPad(pad_to_stride=-1)] if mode == 'eval' and self.metric == 'voc': collate_batch = False else: @@ -1362,11 +1365,11 @@ def train(self, def _compose_batch_transform(self, transforms, mode='train'): if mode == 'train': default_batch_transforms = [ - _BatchPadding(pad_to_stride=32 if self.with_fpn else -1) + _BatchPad(pad_to_stride=32 if self.with_fpn else -1) ] else: default_batch_transforms = [ - _BatchPadding(pad_to_stride=32 if self.with_fpn else -1) + _BatchPad(pad_to_stride=32 if self.with_fpn else -1) ] custom_batch_transforms = [] for i, op in enumerate(transforms.transforms): @@ -1408,7 +1411,7 @@ def _fix_transforms_shape(self, image_shape): self.test_transforms.transforms[resize_op_idx] = Resize( target_size=image_shape, keep_ratio=True, interp='CUBIC') self.test_transforms.transforms.append( - Padding(im_padding_value=[0., 0., 0.])) + Pad(im_padding_value=[0., 0., 0.])) def _get_test_inputs(self, image_shape): if image_shape is not None: @@ -1418,7 +1421,7 @@ def _get_test_inputs(self, image_shape): image_shape = [None, 3, -1, -1] if self.with_fpn: self.test_transforms.transforms.append( - Padding(im_padding_value=[0., 0., 0.])) + Pad(im_padding_value=[0., 0., 0.])) self.fixed_input_shape = image_shape return self._define_input_spec(image_shape) @@ -2187,11 +2190,11 @@ def train(self, def _compose_batch_transform(self, transforms, mode='train'): if mode == 'train': default_batch_transforms = [ - _BatchPadding(pad_to_stride=32 if self.with_fpn else -1) + _BatchPad(pad_to_stride=32 if self.with_fpn else -1) ] else: default_batch_transforms = [ - _BatchPadding(pad_to_stride=32 if self.with_fpn else -1) + _BatchPad(pad_to_stride=32 if self.with_fpn else -1) ] custom_batch_transforms = [] for i, op in enumerate(transforms.transforms): @@ -2233,7 +2236,7 @@ def _fix_transforms_shape(self, image_shape): self.test_transforms.transforms[resize_op_idx] = Resize( target_size=image_shape, keep_ratio=True, interp='CUBIC') self.test_transforms.transforms.append( - Padding(im_padding_value=[0., 0., 0.])) + Pad(im_padding_value=[0., 0., 0.])) def _get_test_inputs(self, image_shape): if image_shape is not None: @@ -2243,7 +2246,7 @@ def _get_test_inputs(self, image_shape): image_shape = [None, 3, -1, -1] if self.with_fpn: self.test_transforms.transforms.append( - Padding(im_padding_value=[0., 0., 0.])) + Pad(im_padding_value=[0., 0., 0.])) self.fixed_input_shape = image_shape return self._define_input_spec(image_shape) diff --git a/paddlers/tasks/segmenter.py b/paddlers/tasks/segmenter.py index 32b7dd0a..f55c772e 100644 --- a/paddlers/tasks/segmenter.py +++ b/paddlers/tasks/segmenter.py @@ -32,7 +32,7 @@ from .base import BaseModel from .utils import seg_metrics as metrics from paddlers.utils.checkpoint import seg_pretrain_weights_dict -from paddlers.transforms import ImgDecoder, Resize +from paddlers.transforms import Resize, decode_image __all__ = ["UNet", "DeepLabV3P", "FastSCNN", "HRNet", "BiSeNetV2", "FarSeg"] @@ -478,9 +478,9 @@ def predict(self, img_file, transforms=None): Do inference. Args: Args: - img_file(List[np.ndarray or str], str or np.ndarray): - Image path or decoded image data in a BGR format, which also could constitute a list, - meaning all images to be predicted as a mini-batch. + img_file(list[np.ndarray | str] | str | np.ndarray): + Image path or decoded image data, which also could constitute a list,meaning all images to be + predicted as a mini-batch. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. @@ -519,7 +519,12 @@ def predict(self, img_file, transforms=None): } return prediction - def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms=None): + def slider_predict(self, + img_file, + save_dir, + block_size, + overlap=36, + transforms=None): """ Do inference. Args: @@ -528,10 +533,10 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= Image path. save_dir(str): Directory that contains saved geotiff file. - block_size(List[int] or Tuple[int], int): - The size of block. - overlap(List[int] or Tuple[int], int): - The overlap between two blocks. Defaults to 36. + block_size(list[int] | tuple[int] | int): + Size of block. + overlap(list[int] | tuple[int] | int, optional): + Overlap between two blocks. Defaults to 36. transforms(paddlers.transforms.Compose or None, optional): Transforms for inputs. If None, the transforms for evaluation process will be used. Defaults to None. """ @@ -539,19 +544,21 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= from osgeo import gdal except: import gdal - + if isinstance(block_size, int): block_size = (block_size, block_size) elif isinstance(block_size, (tuple, list)) and len(block_size) == 2: block_size = tuple(block_size) else: - raise ValueError("`block_size` must be a tuple/list of length 2 or an integer.") + raise ValueError( + "`block_size` must be a tuple/list of length 2 or an integer.") if isinstance(overlap, int): overlap = (overlap, overlap) elif isinstance(overlap, (tuple, list)) and len(overlap) == 2: overlap = tuple(overlap) else: - raise ValueError("`overlap` must be a tuple/list of length 2 or an integer.") + raise ValueError( + "`overlap` must be a tuple/list of length 2 or an integer.") src_data = gdal.Open(img_file) width = src_data.RasterXSize @@ -559,7 +566,8 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= bands = src_data.RasterCount driver = gdal.GetDriverByName("GTiff") - file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[0] + ".tif" + file_name = osp.splitext(osp.normpath(img_file).split(os.sep)[-1])[ + 0] + ".tif" if not osp.exists(save_dir): os.makedirs(save_dir) save_file = osp.join(save_dir, file_name) @@ -577,13 +585,16 @@ def slider_predict(self, img_file, save_dir, block_size, overlap=36, transforms= xsize = int(width - xoff) if yoff + ysize > height: ysize = int(height - yoff) - im = src_data.ReadAsArray(int(xoff), int(yoff), xsize, ysize).transpose((1, 2, 0)) + im = src_data.ReadAsArray(int(xoff), int(yoff), xsize, + ysize).transpose((1, 2, 0)) # fill h, w = im.shape[:2] - im_fill = np.zeros((block_size[1], block_size[0], bands), dtype=im.dtype) + im_fill = np.zeros( + (block_size[1], block_size[0], bands), dtype=im.dtype) im_fill[:h, :w, :] = im # predict - pred = self.predict(im_fill, transforms)["label_map"].astype("uint8") + pred = self.predict(im_fill, + transforms)["label_map"].astype("uint8") # overlap rd_block = band.ReadAsArray(int(xoff), int(yoff), xsize, ysize) mask = (rd_block == pred[:h, :w]) | (rd_block == 255) @@ -600,10 +611,10 @@ def _preprocess(self, images, transforms, to_tensor=True): batch_im = list() batch_ori_shape = list() for im in images: + if isinstance(im, str): + im = decode_image(im, to_rgb=False) + ori_shape = im.shape[:2] sample = {'image': im} - if isinstance(sample['image'], str): - sample = ImgDecoder(to_rgb=False)(sample) - ori_shape = sample['image'].shape[:2] im = transforms(sample)[0] batch_im.append(im) batch_ori_shape.append(ori_shape) @@ -639,7 +650,7 @@ def get_transforms_shape_info(batch_ori_shape, transforms): scale = float(op.long_size) / float(im_long_size) h = int(round(h * scale)) w = int(round(w * scale)) - elif op.__class__.__name__ == 'Padding': + elif op.__class__.__name__ == 'Pad': if op.target_size: target_h, target_w = op.target_size else: diff --git a/paddlers/tasks/slim/prune.py b/paddlers/tasks/slim/prune.py index 9792d0ff..10df7fec 100644 --- a/paddlers/tasks/slim/prune.py +++ b/paddlers/tasks/slim/prune.py @@ -41,7 +41,7 @@ def _pruner_template_input(sample, model_type): def sensitive_prune(pruner, pruned_flops, skip_vars=[], align=None): - # skip depthwise convolutions + # Skip depthwise convolutions for layer in pruner.model.sublayers(): if isinstance(layer, paddle.nn.layer.conv.Conv2D) and layer._groups > 1: for param in layer.parameters(include_sublayers=False): diff --git a/paddlers/tasks/utils/det_metrics/coco_utils.py b/paddlers/tasks/utils/det_metrics/coco_utils.py index c4a024fa..f62e8bf5 100644 --- a/paddlers/tasks/utils/det_metrics/coco_utils.py +++ b/paddlers/tasks/utils/det_metrics/coco_utils.py @@ -35,6 +35,7 @@ def get_infer_results(outs, catid, bias=0): For example, bbox result is a list and each element contains image_id, category_id, bbox and score. """ + if outs is None or len(outs) == 0: raise ValueError( 'The number of valid detection result if zero. Please use reasonable model and check input data.' @@ -78,6 +79,7 @@ def cocoapi_eval(anns, max_dets (tuple): COCO evaluation maxDets. classwise (bool): Whether per-category AP and draw P-R Curve or not. """ + assert coco_gt is not None or anno_file is not None from pycocotools.coco import COCO from pycocotools.cocoeval import COCOeval @@ -220,19 +222,19 @@ def loadRes(coco_obj, anns): def makeplot(rs, ps, outDir, class_name, iou_type): - """针对某个特定类别,绘制不同评估要求下的准确率和召回率。 - 绘制结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。 - - Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L13 + """ + 针对某个特定类别,绘制不同评估要求下的准确率和召回率。 + 绘制结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。 - Args: - rs (np.array): 在不同置信度阈值下计算得到的召回率。 - ps (np.array): 在不同置信度阈值下计算得到的准确率。ps与rs相同位置下的数值为同一个置信度阈值 - 计算得到的准确率与召回率。 - outDir (str): 图表保存的路径。 - class_name (str): 类别名。 - iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。 + Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L13 + Args: + rs (np.array): 在不同置信度阈值下计算得到的召回率。 + ps (np.array): 在不同置信度阈值下计算得到的准确率。ps与rs相同位置下的数值为同一个置信度阈值 + 计算得到的准确率与召回率。 + outDir (str): 图表保存的路径。 + class_name (str): 类别名。 + iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。 """ import matplotlib.pyplot as plt @@ -276,21 +278,22 @@ def makeplot(rs, ps, outDir, class_name, iou_type): def analyze_individual_category(k, cocoDt, cocoGt, catId, iou_type, areas=None): - """针对某个特定类别,分析忽略亚类混淆和类别混淆时的准确率。 + """ + 针对某个特定类别,分析忽略亚类混淆和类别混淆时的准确率。 - Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L174 + Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py#L174 - Args: - k (int): 待分析类别的序号。 - cocoDt (pycocotols.coco.COCO): 按COCO类存放的预测结果。 - cocoGt (pycocotols.coco.COCO): 按COCO类存放的真值。 - catId (int): 待分析类别在数据集中的类别id。 - iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。 + Args: + k (int): 待分析类别的序号。 + cocoDt (pycocotols.coco.COCO): 按COCO类存放的预测结果。 + cocoGt (pycocotols.coco.COCO): 按COCO类存放的真值。 + catId (int): 待分析类别在数据集中的类别id。 + iou_type (str): iou计算方式,若为检测框,则设置为'bbox',若为像素级分割结果,则设置为'segm'。 - Returns: - int: - dict: 有关键字'ps_supercategory'和'ps_allcategory'。关键字'ps_supercategory'的键值是忽略亚类间 - 混淆时的准确率,关键字'ps_allcategory'的键值是忽略类别间混淆时的准确率。 + Returns: + int: + dict: 有关键字'ps_supercategory'和'ps_allcategory'。关键字'ps_supercategory'的键值是忽略亚类间 + 混淆时的准确率,关键字'ps_allcategory'的键值是忽略类别间混淆时的准确率。 """ @@ -362,23 +365,23 @@ def coco_error_analysis(eval_details_file=None, pred_bbox=None, pred_mask=None, save_dir='./output'): - """逐个分析模型预测错误的原因,并将分析结果以图表的形式展示。 - 分析结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。 - - Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py - - Args: - eval_details_file (str): 模型评估结果的保存路径,包含真值信息和预测结果。 - gt (list): 数据集的真值信息。默认值为None。 - pred_bbox (list): 模型在数据集上的预测框。默认值为None。 - pred_mask (list): 模型在数据集上的预测mask。默认值为None。 - save_dir (str): 可视化结果保存路径。默认值为'./output'。 + """ + 逐个分析模型预测错误的原因,并将分析结果以图表的形式展示。 + 分析结果说明参考COCODataset官网给出分析工具说明https://cocodataset.org/#detection-eval。 - Note: - eval_details_file的优先级更高,只要eval_details_file不为None, - 就会从eval_details_file提取真值信息和预测结果做分析。 - 当eval_details_file为None时,则用gt、pred_mask、pred_mask做分析。 + Refer to https://github.com/open-mmlab/mmdetection/blob/master/tools/analysis_tools/coco_error_analysis.py + Args: + eval_details_file (str): 模型评估结果的保存路径,包含真值信息和预测结果。 + gt (list): 数据集的真值信息。默认值为None。 + pred_bbox (list): 模型在数据集上的预测框。默认值为None。 + pred_mask (list): 模型在数据集上的预测mask。默认值为None。 + save_dir (str): 可视化结果保存路径。默认值为'./output'。 + + Note: + eval_details_file的优先级更高,只要eval_details_file不为None, + 就会从eval_details_file提取真值信息和预测结果做分析。 + 当eval_details_file为None时,则用gt、pred_mask、pred_mask做分析。 """ import multiprocessing as mp diff --git a/paddlers/tasks/utils/visualize.py b/paddlers/tasks/utils/visualize.py index fb76f968..2c769954 100644 --- a/paddlers/tasks/utils/visualize.py +++ b/paddlers/tasks/utils/visualize.py @@ -25,7 +25,7 @@ def visualize_detection(image, result, threshold=0.5, save_dir='./', color=None): """ - Visualize bbox and mask results + Visualize bbox and mask results """ if isinstance(image, np.ndarray): @@ -48,6 +48,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./', color=None): """ Convert segment result to color image, and save added image. + Args: image: the path of origin image result: the predict result of image @@ -55,6 +56,7 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./', save_dir: the directory for saving visual image color: the list of a BGR-mode color for each label. """ + label_map = result['label_map'].astype("uint8") color_map = get_color_map_list(256) if color is not None: @@ -104,13 +106,16 @@ def visualize_segmentation(image, result, weight=0.6, save_dir='./', def get_color_map_list(num_classes): - """ Returns the color map for visualizing the segmentation mask, - which can support arbitrary number of classes. + """ + Returns the color map for visualizing the segmentation mask, which can support arbitrary number of classes. + Args: num_classes: Number of classes + Returns: The color map """ + color_map = num_classes * [0, 0, 0] for i in range(0, num_classes): j = 0 diff --git a/paddlers/tools/yolo_cluster.py b/paddlers/tools/yolo_cluster.py index 9efbdcf0..0d7941eb 100644 --- a/paddlers/tools/yolo_cluster.py +++ b/paddlers/tools/yolo_cluster.py @@ -28,6 +28,7 @@ class BaseAnchorCluster(object): def __init__(self, num_anchors, cache, cache_path): """ Base Anchor Cluster + Args: num_anchors (int): number of clusters cache (bool): whether using cache diff --git a/paddlers/transforms/__init__.py b/paddlers/transforms/__init__.py index 0c10e7de..c5ad12e8 100644 --- a/paddlers/transforms/__init__.py +++ b/paddlers/transforms/__init__.py @@ -12,11 +12,45 @@ # See the License for the specific language governing permissions and # limitations under the License. +import copy +import os.path as osp + from .operators import * -from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPadding +from .batch_operators import BatchRandomResize, BatchRandomResizeByShort, _BatchPad from paddlers import transforms as T +def decode_image(im_path, + to_rgb=True, + to_uint8=True, + decode_bgr=True, + decode_sar=True): + """ + Decode an image. + + Args: + to_rgb (bool, optional): If True, convert input image(s) from BGR format to RGB format. Defaults to True. + to_uint8 (bool, optional): If True, quantize and convert decoded image(s) to uint8 type. Defaults to True. + decode_bgr (bool, optional): If True, automatically interpret a non-geo image (e.g. jpeg images) as a BGR image. + Defaults to True. + decode_sar (bool, optional): If True, automatically interpret a two-channel geo image (e.g. geotiff images) as a + SAR image, set this argument to True. Defaults to True. + """ + + # Do a presence check. `osp.exists` assumes `im_path` is a path-like object. + if not osp.exists(im_path): + raise ValueError(f"{im_path} does not exist!") + decoder = T.DecodeImg( + to_rgb=to_rgb, + to_uint8=to_uint8, + decode_bgr=decode_bgr, + decode_sar=decode_sar) + # Deepcopy to avoid inplace modification + sample = {'image': copy.deepcopy(im_path)} + sample = decoder(sample) + return sample['image'] + + def arrange_transforms(model_type, transforms, mode='train'): # 给transforms添加arrange操作 if model_type == 'segmenter': diff --git a/paddlers/transforms/batch_operators.py b/paddlers/transforms/batch_operators.py index d782931d..c2496c58 100644 --- a/paddlers/transforms/batch_operators.py +++ b/paddlers/transforms/batch_operators.py @@ -74,7 +74,7 @@ class BatchRandomResize(Transform): Attention: If interp is 'RANDOM', the interpolation method will be chose randomly. Args: - target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]): + target_sizes (list[int] | list[list | tuple] | tuple[list | tuple]): Multiple target sizes, each target size is an int or list/tuple of length 2. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'. @@ -93,7 +93,7 @@ def __init__(self, target_sizes, interp='NEAREST'): interp_dict.keys())) self.interp = interp assert isinstance(target_sizes, list), \ - "target_size must be List" + "target_size must be a list." for i, item in enumerate(target_sizes): if isinstance(item, int): target_sizes[i] = (item, item) @@ -113,7 +113,7 @@ class BatchRandomResizeByShort(Transform): Attention: If interp is 'RANDOM', the interpolation method will be chose randomly. Args: - short_sizes (List[int], Tuple[int]): Target sizes of the shorter side of the image(s). + short_sizes (list[int] | tuple[int]): Target sizes of the shorter side of the image(s). max_size (int, optional): The upper bound of longer side of the image(s). If max_size is -1, no upper bound is applied. Defaults to -1. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): @@ -134,7 +134,7 @@ def __init__(self, short_sizes, max_size=-1, interp='NEAREST'): interp_dict.keys())) self.interp = interp assert isinstance(short_sizes, list), \ - "short_sizes must be List" + "short_sizes must be a list." self.short_sizes = short_sizes self.max_size = max_size @@ -149,9 +149,9 @@ def __call__(self, samples): return samples -class _BatchPadding(Transform): +class _BatchPad(Transform): def __init__(self, pad_to_stride=0): - super(_BatchPadding, self).__init__() + super(_BatchPad, self).__init__() self.pad_to_stride = pad_to_stride def __call__(self, samples): diff --git a/paddlers/transforms/functions.py b/paddlers/transforms/functions.py index 11200bac..68c59a67 100644 --- a/paddlers/transforms/functions.py +++ b/paddlers/transforms/functions.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -import cv2 import copy +import cv2 import numpy as np import shapely.ops from shapely.geometry import Polygon, MultiPolygon, GeometryCollection @@ -126,45 +126,52 @@ def img_flip(im, method=0): """ if not len(im.shape) >= 2: raise ValueError("Shape of image should 2d, 3d or more") - if method==0 or method=='h': + if method == 0 or method == 'h': return horizontal_flip(im) - elif method==1 or method=='v': + elif method == 1 or method == 'v': return vertical_flip(im) - elif method==2 or method=='hv': + elif method == 2 or method == 'hv': return hv_flip(im) - elif method==3 or method=='rt2lb' or method=='dia': + elif method == 3 or method == 'rt2lb' or method == 'dia': return rt2lb_flip(im) - elif method==4 or method=='lt2rb' or method=='adia': + elif method == 4 or method == 'lt2rb' or method == 'adia': return lt2rb_flip(im) else: return im + def horizontal_flip(im): im = im[:, ::-1, ...] return im + def vertical_flip(im): im = im[::-1, :, ...] return im + def hv_flip(im): im = im[::-1, ::-1, ...] return im + def rt2lb_flip(im): axs_list = list(range(len(im.shape))) axs_list[:2] = [1, 0] im = im.transpose(axs_list) return im + def lt2rb_flip(im): axs_list = list(range(len(im.shape))) axs_list[:2] = [1, 0] im = im[::-1, ::-1, ...].transpose(axs_list) return im + # endregion + # region rotation def img_simple_rotate(im, method=0): """ @@ -223,30 +230,35 @@ def img_simple_rotate(im, method=0): """ if not len(im.shape) >= 2: raise ValueError("Shape of image should 2d, 3d or more") - if method==0 or method==90: + if method == 0 or method == 90: return rot_90(im) - elif method==1 or method==180: + elif method == 1 or method == 180: return rot_180(im) - elif method==2 or method==270: + elif method == 2 or method == 270: return rot_270(im) else: return im + def rot_90(im): axs_list = list(range(len(im.shape))) axs_list[:2] = [1, 0] im = im[::-1, :, ...].transpose(axs_list) return im + def rot_180(im): im = im[::-1, ::-1, ...] return im + def rot_270(im): axs_list = list(range(len(im.shape))) axs_list[:2] = [1, 0] im = im[:, ::-1, ...].transpose(axs_list) return im + + # endregion @@ -477,15 +489,16 @@ def select_bands(im, band_list=[1, 2, 3]): return ima -def de_haze(im, gamma=False): - """ Priori defogging of dark channel. (Just RGB) +def dehaze(im, gamma=False): + """ + Single image haze removal using dark channel prior. Args: - im (np.ndarray): The image. + im (np.ndarray): Input image. gamma (bool, optional): Use gamma correction or not. Defaults to False. Returns: - np.ndarray: The image after defogged. + np.ndarray: The image after dehazed. """ def _guided_filter(I, p, r, eps): @@ -501,7 +514,7 @@ def _guided_filter(I, p, r, eps): m_b = cv2.boxFilter(b, -1, (r, r)) return m_a * I + m_b - def _de_fog(im, r, w, maxatmo_mask, eps): + def _dehaze(im, r, w, maxatmo_mask, eps): # im is RGB and range[0, 1] atmo_mask = np.min(im, 2) dark_channel = cv2.erode(atmo_mask, np.ones((15, 15))) @@ -519,7 +532,7 @@ def _de_fog(im, r, w, maxatmo_mask, eps): if np.max(im) > 1: im = im / 255. result = np.zeros(im.shape) - mask_img, atmo_illum = _de_fog( + mask_img, atmo_illum = _dehaze( im, r=81, w=0.95, maxatmo_mask=0.80, eps=1e-8) for k in range(3): result[:, :, k] = (im[:, :, k] - mask_img) / (1 - mask_img / atmo_illum) @@ -534,11 +547,11 @@ def match_histograms(im, ref): Match the cumulative histogram of one image to another. Args: - im (np.ndarray): The input image. - ref (np.ndarray): The reference image to match histogram of. `ref` must have the same number of channels as `im`. + im (np.ndarray): Input image. + ref (np.ndarray): Reference image to match histogram of. `ref` must have the same number of channels as `im`. Returns: - np.ndarray: The transformed input image. + np.ndarray: Transformed input image. Raises: ValueError: When the number of channels of `ref` differs from that of im`. @@ -553,14 +566,14 @@ def match_by_regression(im, ref, pif_loc=None): Match the brightness values of two images using a linear regression method. Args: - im (np.ndarray): The input image. - ref (np.ndarray): The reference image to match. `ref` must have the same shape as `im`. - pif_loc (tuple|None, optional): The spatial locations where pseudo-invariant features (PIFs) are obtained. If + im (np.ndarray): Input image. + ref (np.ndarray): Reference image to match. `ref` must have the same shape as `im`. + pif_loc (tuple|None, optional): Spatial locations where pseudo-invariant features (PIFs) are obtained. If `pif_loc` is set to None, all pixels in the image will be used as training samples for the regression model. In other cases, `pif_loc` should be a tuple of np.ndarrays. Default: None. Returns: - np.ndarray: The transformed input image. + np.ndarray: Transformed input image. Raises: ValueError: When the shape of `ref` differs from that of `im`. diff --git a/paddlers/transforms/operators.py b/paddlers/transforms/operators.py index fad74a43..d2091aca 100644 --- a/paddlers/transforms/operators.py +++ b/paddlers/transforms/operators.py @@ -32,12 +32,12 @@ import paddlers from .functions import normalize, horizontal_flip, permute, vertical_flip, center_crop, is_poly, \ horizontal_flip_poly, horizontal_flip_rle, vertical_flip_poly, vertical_flip_rle, crop_poly, \ - crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, de_haze, select_bands, \ + crop_rle, expand_poly, expand_rle, resize_poly, resize_rle, dehaze, select_bands, \ to_intensity, to_uint8, img_flip, img_simple_rotate __all__ = [ "Compose", - "ImgDecoder", + "DecodeImg", "Resize", "RandomResize", "ResizeByShort", @@ -50,19 +50,19 @@ "RandomCrop", "RandomScaleAspect", "RandomExpand", - "Padding", + "Pad", "MixupImage", "RandomDistort", "RandomBlur", "RandomSwap", - "Defogging", - "DimReducing", - "BandSelecting", + "Dehaze", + "ReduceDim", + "SelectBand", "ArrangeSegmenter", "ArrangeChangeDetector", "ArrangeClassifier", "ArrangeDetector", - "RandomFlipOrRotation", + "RandomFlipOrRotate", ] interp_dict = { @@ -119,19 +119,31 @@ def __call__(self, sample): return sample -class ImgDecoder(Transform): +class DecodeImg(Transform): """ Decode image(s) in input. + Args: - to_rgb (bool, optional): If True, convert input images from BGR format to RGB format. Defaults to True. + to_rgb (bool, optional): If True, convert input image(s) from BGR format to RGB format. Defaults to True. + to_uint8 (bool, optional): If True, quantize and convert decoded image(s) to uint8 type. Defaults to True. + decode_bgr (bool, optional): If True, automatically interpret a non-geo image (e.g., jpeg images) as a BGR image. + Defaults to True. + decode_sar (bool, optional): If True, automatically interpret a two-channel geo image (e.g. geotiff images) as a + SAR image, set this argument to True. Defaults to True. """ - def __init__(self, to_rgb=True, to_uint8=True): - super(ImgDecoder, self).__init__() + def __init__(self, + to_rgb=True, + to_uint8=True, + decode_bgr=True, + decode_sar=True): + super(DecodeImg, self).__init__() self.to_rgb = to_rgb self.to_uint8 = to_uint8 + self.decode_bgr = decode_bgr + self.decode_sar = decode_sar - def read_img(self, img_path, input_channel=3): + def read_img(self, img_path): img_format = imghdr.what(img_path) name, ext = os.path.splitext(img_path) if img_format == 'tiff' or ext == '.img': @@ -140,24 +152,24 @@ def read_img(self, img_path, input_channel=3): except: try: from osgeo import gdal - except: - raise Exception( - "Failed to import gdal! You can try use conda to install gdal" + except ImportError: + raise ImportError( + "Failed to import gdal! Please install GDAL library according to the document." ) - six.reraise(*sys.exc_info()) dataset = gdal.Open(img_path) if dataset == None: - raise Exception('Can not open', img_path) + raise IOError('Can not open', img_path) im_data = dataset.ReadAsArray() - if im_data.ndim == 2: + if im_data.ndim == 2 and self.decode_sar: im_data = to_intensity(im_data) # is read SAR im_data = im_data[:, :, np.newaxis] - elif im_data.ndim == 3: - im_data = im_data.transpose((1, 2, 0)) + else: + if im_data.ndim == 3: + im_data = im_data.transpose((1, 2, 0)) return im_data elif img_format in ['jpeg', 'bmp', 'png', 'jpg']: - if input_channel == 3: + if self.decode_bgr: return cv2.imread(img_path, cv2.IMREAD_ANYDEPTH | cv2.IMREAD_ANYCOLOR | cv2.IMREAD_COLOR) else: @@ -166,7 +178,7 @@ def read_img(self, img_path, input_channel=3): elif ext == '.npy': return np.load(img_path) else: - raise Exception('Image format {} is not supported!'.format(ext)) + raise TypeError('Image format {} is not supported!'.format(ext)) def apply_im(self, im_path): if isinstance(im_path, str): @@ -192,7 +204,7 @@ def apply_mask(self, mask): except: raise ValueError("Cannot read the mask file {}!".format(mask)) if len(mask.shape) != 2: - raise Exception( + raise ValueError( "Mask should be a 1-channel image, but recevied is a {}-channel image.". format(mask.shape[2])) return mask @@ -201,6 +213,7 @@ def apply(self, sample): """ Args: sample (dict): Input sample. + Returns: dict: Decoded sample. """ @@ -218,8 +231,8 @@ def apply(self, sample): im_height, im_width, _ = sample['image'].shape se_height, se_width = sample['mask'].shape if im_height != se_height or im_width != se_width: - raise Exception( - "The height or width of the im is not same as the mask") + raise ValueError( + "The height or width of the image is not same as the mask.") if 'aux_masks' in sample: sample['aux_masks'] = list( map(self.apply_mask, sample['aux_masks'])) @@ -237,7 +250,7 @@ class Compose(Transform): All input images are in Height-Width-Channel ([H, W, C]) format. Args: - transforms (List[paddlers.transforms.Transform]): List of data preprocess or augmentations. + transforms (list[paddlers.transforms.Transform]): List of data preprocess or augmentations. Raises: TypeError: Invalid type of transforms. ValueError: Invalid length of transforms. @@ -247,14 +260,14 @@ def __init__(self, transforms, to_uint8=True): super(Compose, self).__init__() if not isinstance(transforms, list): raise TypeError( - 'Type of transforms is invalid. Must be List, but received is {}' + 'Type of transforms is invalid. Must be a list, but received is {}' .format(type(transforms))) if len(transforms) < 1: raise ValueError( 'Length of transforms must not be less than 1, but received is {}' .format(len(transforms))) self.transforms = transforms - self.decode_image = ImgDecoder(to_uint8=to_uint8) + self.decode_image = DecodeImg(to_uint8=to_uint8) self.arrange_outputs = None self.apply_im_only = False @@ -295,7 +308,7 @@ class Resize(Transform): Attention: If interp is 'RANDOM', the interpolation method will be chose randomly. Args: - target_size (int, List[int] or Tuple[int]): Target size. If int, the height and width share the same target_size. + target_size (int, list[int] | tuple[int]): Target size. If int, the height and width share the same target_size. Otherwise, target_size represents [target height, target width]. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'. @@ -414,7 +427,7 @@ class RandomResize(Transform): Attention: If interp is 'RANDOM', the interpolation method will be chose randomly. Args: - target_sizes (List[int], List[list or tuple] or Tuple[list or tuple]): + target_sizes (list[int] | list[list | tuple] | tuple[list | tuple]): Multiple target sizes, each target size is an int or list/tuple. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'. @@ -434,7 +447,7 @@ def __init__(self, target_sizes, interp='LINEAR'): interp_dict.keys())) self.interp = interp assert isinstance(target_sizes, list), \ - "target_size must be List" + "target_size must be a list." for i, item in enumerate(target_sizes): if isinstance(item, int): target_sizes[i] = (item, item) @@ -494,7 +507,7 @@ class RandomResizeByShort(Transform): Attention: If interp is 'RANDOM', the interpolation method will be chose randomly. Args: - short_sizes (List[int]): Target size of the shorter side of the image(s). + short_sizes (list[int]): Target size of the shorter side of the image(s). max_size (int, optional): The upper bound of longer side of the image(s). If max_size is -1, no upper bound is applied. Defaults to -1. interp ({'NEAREST', 'LINEAR', 'CUBIC', 'AREA', 'LANCZOS4', 'RANDOM'}, optional): Interpolation method of resize. Defaults to 'LINEAR'. @@ -513,7 +526,7 @@ def __init__(self, short_sizes, max_size=-1, interp='LINEAR'): interp_dict.keys())) self.interp = interp assert isinstance(short_sizes, list), \ - "short_sizes must be List" + "short_sizes must be a list." self.short_sizes = short_sizes self.max_size = max_size @@ -544,7 +557,7 @@ def apply(self, sample): return sample -class RandomFlipOrRotation(Transform): +class RandomFlipOrRotate(Transform): """ Flip or Rotate an image in different ways with a certain probability. @@ -561,7 +574,7 @@ class RandomFlipOrRotation(Transform): # 定义数据增强 train_transforms = T.Compose([ - T.RandomFlipOrRotation( + T.RandomFlipOrRotate( probs = [0.3, 0.2] # 进行flip增强的概率是0.3,进行rotate增强的概率是0.2,不变的概率是0.5 probsf = [0.3, 0.25, 0, 0, 0] # flip增强时,使用水平flip、垂直flip的概率分别是0.3、0.25,水平且垂直flip、对角线flip、反对角线flip概率均为0,不变的概率是0.45 probsr = [0, 0.65, 0]), # rotate增强时,顺时针旋转90度的概率是0,顺时针旋转180度的概率是0.65,顺时针旋转90度的概率是0,不变的概率是0.35 @@ -574,7 +587,7 @@ def __init__(self, probs=[0.35, 0.25], probsf=[0.3, 0.3, 0.2, 0.1, 0.1], probsr=[0.25, 0.5, 0.25]): - super(RandomFlipOrRotation, self).__init__() + super(RandomFlipOrRotate, self).__init__() # Change various probabilities into probability intervals, to judge in which mode to flip or rotate self.probs = [probs[0], probs[0] + probs[1]] self.probsf = self.get_probs_range(probsf) @@ -594,6 +607,16 @@ def apply_mask(self, mask, mode_id, flip_mode=True): mask = img_simple_rotate(mask, mode_id) return mask + def apply_bbox(self, bbox, mode_id, flip_mode=True): + raise TypeError( + "Currently, `paddlers.transforms.RandomFlipOrRotate` is not available for object detection tasks." + ) + + def apply_segm(self, bbox, mode_id, flip_mode=True): + raise TypeError( + "Currently, `paddlers.transforms.RandomFlipOrRotate` is not available for object detection tasks." + ) + def get_probs_range(self, probs): ''' Change various probabilities into cumulative probabilities @@ -637,14 +660,43 @@ def apply(self, sample): mode_p = random.random() mode_id = self.judge_probs_range(mode_p, self.probsf) sample['image'] = self.apply_im(sample['image'], mode_id, True) + if 'image2' in sample: + sample['image2'] = self.apply_im(sample['image2'], mode_id, + True) if 'mask' in sample: sample['mask'] = self.apply_mask(sample['mask'], mode_id, True) + if 'aux_masks' in sample: + sample['aux_masks'] = [ + self.apply_mask(aux_mask, mode_id, True) + for aux_mask in sample['aux_masks'] + ] + if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: + sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], mode_id, + True) + if 'gt_poly' in sample and len(sample['gt_poly']) > 0: + sample['gt_poly'] = self.apply_segm(sample['gt_poly'], mode_id, + True) elif p_m < self.probs[1]: mode_p = random.random() mode_id = self.judge_probs_range(mode_p, self.probsr) sample['image'] = self.apply_im(sample['image'], mode_id, False) + if 'image2' in sample: + sample['image2'] = self.apply_im(sample['image2'], mode_id, + False) if 'mask' in sample: sample['mask'] = self.apply_mask(sample['mask'], mode_id, False) + if 'aux_masks' in sample: + sample['aux_masks'] = [ + self.apply_mask(aux_mask, mode_id, False) + for aux_mask in sample['aux_masks'] + ] + if 'gt_bbox' in sample and len(sample['gt_bbox']) > 0: + sample['gt_bbox'] = self.apply_bbox(sample['gt_bbox'], mode_id, + False) + if 'gt_poly' in sample and len(sample['gt_poly']) > 0: + sample['gt_poly'] = self.apply_segm(sample['gt_poly'], mode_id, + False) + return sample @@ -766,16 +818,16 @@ def apply(self, sample): class Normalize(Transform): """ - Apply min-max normalization to the image(s) in input. + Apply normalization to the input image(s). The normalization steps are: 1. im = (im - min_value) * 1 / (max_value - min_value) 2. im = im - mean 3. im = im / std Args: - mean(List[float] or Tuple[float], optional): Mean of input image(s). Defaults to [0.485, 0.456, 0.406]. - std(List[float] or Tuple[float], optional): Standard deviation of input image(s). Defaults to [0.229, 0.224, 0.225]. - min_val(List[float] or Tuple[float], optional): Minimum value of input image(s). Defaults to [0, 0, 0, ]. - max_val(List[float] or Tuple[float], optional): Max value of input image(s). Defaults to [255., 255., 255.]. + mean(list[float] | tuple[float], optional): Mean of input image(s). Defaults to [0.485, 0.456, 0.406]. + std(list[float] | tuple[float], optional): Standard deviation of input image(s). Defaults to [0.229, 0.224, 0.225]. + min_val(list[float] | tuple[float], optional): Minimum value of input image(s). Defaults to [0, 0, 0, ]. + max_val(list[float] | tuple[float], optional): Max value of input image(s). Defaults to [255., 255., 255.]. """ def __init__(self, @@ -865,12 +917,12 @@ class RandomCrop(Transform): 4. Resize the cropped area to crop_size by crop_size. Args: - crop_size(int, List[int] or Tuple[int]): Target size of the cropped area. If None, the cropped area will not be + crop_size(int, list[int] | tuple[int]): Target size of the cropped area. If None, the cropped area will not be resized. Defaults to None. - aspect_ratio (List[float], optional): Aspect ratio of cropped region in [min, max] format. Defaults to [.5, 2.]. - thresholds (List[float], optional): Iou thresholds to decide a valid bbox crop. + aspect_ratio (list[float], optional): Aspect ratio of cropped region in [min, max] format. Defaults to [.5, 2.]. + thresholds (list[float], optional): Iou thresholds to decide a valid bbox crop. Defaults to [.0, .1, .3, .5, .7, .9]. - scaling (List[float], optional): Ratio between the cropped region and the original image in [min, max] format. + scaling (list[float], optional): Ratio between the cropped region and the original image in [min, max] format. Defaults to [.3, 1.]. num_attempts (int, optional): The number of tries before giving up. Defaults to 50. allow_no_crop (bool, optional): Whether returning without doing crop is allowed. Defaults to True. @@ -1088,11 +1140,11 @@ class RandomExpand(Transform): Args: upper_ratio(float, optional): The maximum ratio to which the original image is expanded. Defaults to 4.. prob(float, optional): The probability of apply expanding. Defaults to .5. - im_padding_value(List[float] or Tuple[float], optional): RGB filling value for the image. Defaults to (127.5, 127.5, 127.5). + im_padding_value(list[float] | tuple[float], optional): RGB filling value for the image. Defaults to (127.5, 127.5, 127.5). label_padding_value(int, optional): Filling value for the mask. Defaults to 255. See Also: - paddlers.transforms.Padding + paddlers.transforms.Pad """ def __init__(self, @@ -1120,7 +1172,7 @@ def apply(self, sample): x = np.random.randint(0, w - im_w) target_size = (h, w) offsets = (x, y) - sample = Padding( + sample = Pad( target_size=target_size, pad_mode=-1, offsets=offsets, @@ -1129,7 +1181,7 @@ def apply(self, sample): return sample -class Padding(Transform): +class Pad(Transform): def __init__(self, target_size=None, pad_mode=0, @@ -1148,7 +1200,7 @@ def __init__(self, label_padding_value(int, optional): Filling value for the mask. Defaults to 255. size_divisor(int): Image width and height after padding is a multiple of coarsest_stride. """ - super(Padding, self).__init__() + super(Pad, self).__init__() if isinstance(target_size, (list, tuple)): if len(target_size) != 2: raise ValueError( @@ -1525,20 +1577,20 @@ def apply(self, sample): return sample -class Defogging(Transform): +class Dehaze(Transform): """ - Defog input image(s). + Dehaze input image(s). Args: gamma (bool, optional): Use gamma correction or not. Defaults to False. """ def __init__(self, gamma=False): - super(Defogging, self).__init__() + super(Dehaze, self).__init__() self.gamma = gamma def apply_im(self, image): - image = de_haze(image, self.gamma) + image = dehaze(image, self.gamma) return image def apply(self, sample): @@ -1548,19 +1600,20 @@ def apply(self, sample): return sample -class DimReducing(Transform): +class ReduceDim(Transform): """ - Use PCA to reduce input image(s) dimension. + Use PCA to reduce the dimension of input image(s). Args: - joblib_path (str): Path of *.joblib about PCA. + joblib_path (str): Path of *.joblib file of PCA. """ def __init__(self, joblib_path): - super(DimReducing, self).__init__() + super(ReduceDim, self).__init__() ext = joblib_path.split(".")[-1] if ext != "joblib": - raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format(ext)) + raise ValueError("`joblib_path` must be *.joblib, not *.{}.".format( + ext)) self.pca = load(joblib_path) def apply_im(self, image): @@ -1577,16 +1630,16 @@ def apply(self, sample): return sample -class BandSelecting(Transform): +class SelectBand(Transform): """ - Select the band of the input image(s). + Select a set of bands of input image(s). Args: - band_list (list, optional): Bands of selected (Start with 1). Defaults to [1, 2, 3]. + band_list (list, optional): Bands to select (the band index starts with 1). Defaults to [1, 2, 3]. """ def __init__(self, band_list=[1, 2, 3]): - super(BandSelecting, self).__init__() + super(SelectBand, self).__init__() self.band_list = band_list def apply_im(self, image): diff --git a/paddlers/utils/__init__.py b/paddlers/utils/__init__.py index 832793d1..842e5331 100644 --- a/paddlers/utils/__init__.py +++ b/paddlers/utils/__init__.py @@ -21,4 +21,4 @@ from .env import get_environ_info, get_num_workers, init_parallel_env from .download import download_and_decompress, decompress from .stats import SmoothedValue, TrainingStats -from .shm import _get_shared_memory_size_in_M \ No newline at end of file +from .shm import _get_shared_memory_size_in_M diff --git a/paddlers/utils/stats.py b/paddlers/utils/stats.py index 7b4f09a7..447753fa 100644 --- a/paddlers/utils/stats.py +++ b/paddlers/utils/stats.py @@ -17,6 +17,7 @@ import numpy as np + class SmoothedValue(object): """Track a series of values and provide access to smoothed values over a window. diff --git a/requirements.txt b/requirements.txt index 1cd4bfde..706bb439 100644 --- a/requirements.txt +++ b/requirements.txt @@ -18,5 +18,6 @@ munch natsort geojson colorama +filelock # # Self installation # GDAL >= 3.1.3 diff --git a/tests/check_coverage.sh b/tests/check_coverage.sh new file mode 100644 index 00000000..7d476c91 --- /dev/null +++ b/tests/check_coverage.sh @@ -0,0 +1,6 @@ +#!/usr/bin/env bash + +bash download_test_data.sh +coverage run --source paddlers,$(ls -d ../tools/* | tr '\n' ',') --omit=../paddlers/models/* -m unittest discover -v +coverage report +coverage html -d coverage_html \ No newline at end of file diff --git a/tests/components/__init__.py b/tests/components/__init__.py new file mode 100644 index 00000000..29c8b7d1 --- /dev/null +++ b/tests/components/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/data/.gitignore b/tests/data/.gitignore new file mode 100644 index 00000000..bd590fd8 --- /dev/null +++ b/tests/data/.gitignore @@ -0,0 +1,2 @@ +*.tar.gz +*/ \ No newline at end of file diff --git a/tests/data/README.md b/tests/data/README.md new file mode 100644 index 00000000..94ce141c --- /dev/null +++ b/tests/data/README.md @@ -0,0 +1,5 @@ +# Testing Data + +This directory stores real samples that can be used for testing. + +*ssmt* means single-source-multi-temporal and *ssst* means single-source-single-temporal. diff --git a/tests/data/__init__.py b/tests/data/__init__.py new file mode 100644 index 00000000..c9b33527 --- /dev/null +++ b/tests/data/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .data_utils import * diff --git a/tests/data/data_utils.py b/tests/data/data_utils.py new file mode 100644 index 00000000..b0d421ba --- /dev/null +++ b/tests/data/data_utils.py @@ -0,0 +1,378 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path as osp +import re +import imghdr +import platform +from collections import OrderedDict +from functools import partial, wraps + +import numpy as np + +__all__ = ['build_input_from_file'] + + +def norm_path(path): + win_sep = "\\" + other_sep = "/" + if platform.system() == "Windows": + path = win_sep.join(path.split(other_sep)) + else: + path = other_sep.join(path.split(win_sep)) + return path + + +def is_pic(im_path): + valid_suffix = [ + 'JPEG', 'jpeg', 'JPG', 'jpg', 'BMP', 'bmp', 'PNG', 'png', 'npy' + ] + suffix = im_path.split('.')[-1] + if suffix in valid_suffix: + return True + im_format = imghdr.what(im_path) + _, ext = osp.splitext(im_path) + if im_format == 'tiff' or ext == '.img': + return True + return False + + +def get_full_path(p, prefix=''): + p = norm_path(p) + return osp.join(prefix, p) + + +def silent(func): + def _do_nothing(*args, **kwargs): + pass + + @wraps(func) + def _wrapper(*args, **kwargs): + import builtins + print = builtins.print + builtins.print = _do_nothing + ret = func(*args, **kwargs) + builtins.print = print + return ret + + return _wrapper + + +class ConstrSample(object): + def __init__(self, prefix, label_list): + super().__init__() + self.prefix = prefix + self.label_list_obj = self.read_label_list(label_list) + self.get_full_path = partial(get_full_path, prefix=self.prefix) + + def read_label_list(self, label_list): + if label_list is None: + return None + cname2cid = OrderedDict() + label_id = 0 + with open(label_list, 'r') as f: + for line in f: + cname2cid[line.strip()] = label_id + label_id += 1 + return cname2cid + + def __call__(self, *parts): + raise NotImplementedError + + +class ConstrSegSample(ConstrSample): + def __call__(self, im_path, mask_path): + return { + 'image': self.get_full_path(im_path), + 'mask': self.get_full_path(mask_path) + } + + +class ConstrCdSample(ConstrSample): + def __call__(self, im1_path, im2_path, mask_path, *aux_mask_paths): + sample = { + 'image_t1': self.get_full_path(im1_path), + 'image_t2': self.get_full_path(im2_path), + 'mask': self.get_full_path(mask_path) + } + if len(aux_mask_paths) > 0: + sample['aux_masks'] = [ + self.get_full_path(p) for p in aux_mask_paths + ] + return sample + + +class ConstrClasSample(ConstrSample): + def __call__(self, im_path, label): + return {'image': self.get_full_path(im_path), 'label': int(label)} + + +class ConstrDetSample(ConstrSample): + def __init__(self, prefix, label_list): + super().__init__(prefix, label_list) + self.ct = 0 + + def __call__(self, im_path, ann_path): + im_path = self.get_full_path(im_path) + ann_path = self.get_full_path(ann_path) + # TODO: Precisely recognize the annotation format + if ann_path.endswith('.json'): + im_dir = im_path + return self._parse_coco_files(im_dir, ann_path) + elif ann_path.endswith('.xml'): + return self._parse_voc_files(im_path, ann_path) + else: + raise ValueError("Cannot recognize the annotation format") + + def _parse_voc_files(self, im_path, ann_path): + import xml.etree.ElementTree as ET + + cname2cid = self.label_list_obj + tree = ET.parse(ann_path) + # The xml file must contain id. + if tree.find('id') is None: + im_id = np.asarray([self.ct]) + else: + self.ct = int(tree.find('id').text) + im_id = np.asarray([int(tree.find('id').text)]) + pattern = re.compile('', re.IGNORECASE) + size_tag = pattern.findall(str(ET.tostringlist(tree.getroot()))) + if len(size_tag) > 0: + size_tag = size_tag[0][1:-1] + size_element = tree.find(size_tag) + pattern = re.compile('', re.IGNORECASE) + width_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][ + 1:-1] + im_w = float(size_element.find(width_tag).text) + pattern = re.compile('', re.IGNORECASE) + height_tag = pattern.findall(str(ET.tostringlist(size_element)))[0][ + 1:-1] + im_h = float(size_element.find(height_tag).text) + else: + im_w = 0 + im_h = 0 + + pattern = re.compile('', re.IGNORECASE) + obj_match = pattern.findall(str(ET.tostringlist(tree.getroot()))) + if len(obj_match) > 0: + obj_tag = obj_match[0][1:-1] + objs = tree.findall(obj_tag) + else: + objs = list() + + num_bbox, i = len(objs), 0 + gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32) + gt_class = np.zeros((num_bbox, 1), dtype=np.int32) + gt_score = np.zeros((num_bbox, 1), dtype=np.float32) + is_crowd = np.zeros((num_bbox, 1), dtype=np.int32) + difficult = np.zeros((num_bbox, 1), dtype=np.int32) + for obj in objs: + pattern = re.compile('', re.IGNORECASE) + name_tag = pattern.findall(str(ET.tostringlist(obj)))[0][1:-1] + cname = obj.find(name_tag).text.strip() + pattern = re.compile('', re.IGNORECASE) + diff_tag = pattern.findall(str(ET.tostringlist(obj))) + if len(diff_tag) == 0: + _difficult = 0 + else: + diff_tag = diff_tag[0][1:-1] + try: + _difficult = int(obj.find(diff_tag).text) + except Exception: + _difficult = 0 + pattern = re.compile('', re.IGNORECASE) + box_tag = pattern.findall(str(ET.tostringlist(obj))) + if len(box_tag) == 0: + continue + box_tag = box_tag[0][1:-1] + box_element = obj.find(box_tag) + pattern = re.compile('', re.IGNORECASE) + xmin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1: + -1] + x1 = float(box_element.find(xmin_tag).text) + pattern = re.compile('', re.IGNORECASE) + ymin_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1: + -1] + y1 = float(box_element.find(ymin_tag).text) + pattern = re.compile('', re.IGNORECASE) + xmax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1: + -1] + x2 = float(box_element.find(xmax_tag).text) + pattern = re.compile('', re.IGNORECASE) + ymax_tag = pattern.findall(str(ET.tostringlist(box_element)))[0][1: + -1] + y2 = float(box_element.find(ymax_tag).text) + x1 = max(0, x1) + y1 = max(0, y1) + if im_w > 0.5 and im_h > 0.5: + x2 = min(im_w - 1, x2) + y2 = min(im_h - 1, y2) + + if not (x2 >= x1 and y2 >= y1): + continue + + gt_bbox[i, :] = [x1, y1, x2, y2] + gt_class[i, 0] = cname2cid[cname] + gt_score[i, 0] = 1. + is_crowd[i, 0] = 0 + difficult[i, 0] = _difficult + i += 1 + + gt_bbox = gt_bbox[:i, :] + gt_class = gt_class[:i, :] + gt_score = gt_score[:i, :] + is_crowd = is_crowd[:i, :] + difficult = difficult[:i, :] + + im_info = { + 'im_id': im_id, + 'image_shape': np.array( + [im_h, im_w], dtype=np.int32) + } + label_info = { + 'is_crowd': is_crowd, + 'gt_class': gt_class, + 'gt_bbox': gt_bbox, + 'gt_score': gt_score, + 'difficult': difficult + } + + self.ct += 1 + return {'image': im_path, ** im_info, ** label_info} + + @silent + def _parse_coco_files(self, im_dir, ann_path): + from pycocotools.coco import COCO + + coco = COCO(ann_path) + img_ids = coco.getImgIds() + img_ids.sort() + + samples = [] + for img_id in img_ids: + img_anno = coco.loadImgs([img_id])[0] + im_fname = img_anno['file_name'] + im_w = float(img_anno['width']) + im_h = float(img_anno['height']) + + im_path = osp.join(im_dir, im_fname) if im_dir else im_fname + + im_info = { + 'image': im_path, + 'im_id': np.array([img_id]), + 'image_shape': np.array( + [im_h, im_w], dtype=np.int32) + } + + ins_anno_ids = coco.getAnnIds(imgIds=[img_id], iscrowd=False) + instances = coco.loadAnns(ins_anno_ids) + + is_crowds = [] + gt_classes = [] + gt_bboxs = [] + gt_scores = [] + difficults = [] + + for inst in instances: + # Check gt bbox + if inst.get('ignore', False): + continue + if 'bbox' not in inst.keys(): + continue + else: + if not any(np.array(inst['bbox'])): + continue + + # Read box + x1, y1, box_w, box_h = inst['bbox'] + x2 = x1 + box_w + y2 = y1 + box_h + eps = 1e-5 + if inst['area'] > 0 and x2 - x1 > eps and y2 - y1 > eps: + inst['clean_bbox'] = [ + round(float(x), 3) for x in [x1, y1, x2, y2] + ] + + is_crowds.append([inst['iscrowd']]) + gt_classes.append([inst['category_id']]) + gt_bboxs.append(inst['clean_bbox']) + gt_scores.append([1.]) + difficults.append([0]) + + label_info = { + 'is_crowd': np.array(is_crowds), + 'gt_class': np.array(gt_classes), + 'gt_bbox': np.array(gt_bboxs).astype(np.float32), + 'gt_score': np.array(gt_scores).astype(np.float32), + 'difficult': np.array(difficults), + } + + samples.append({ ** im_info, ** label_info}) + + return samples + + +def build_input_from_file(file_list, prefix='', task='auto', label_list=None): + """ + Construct a list of dictionaries from file. Each dict in the list can be used as the input to `paddlers.transforms.Transform` objects. + + Args: + file_list (str): Path of file_list. + prefix (str, optional): A nonempty `prefix` specifies the directory that stores the images and annotation files. Default: ''. + task (str, optional): Supported values are 'seg', 'det', 'cd', 'clas', and 'auto'. When `task` is set to 'auto', automatically determine the task based on the input. + Default: 'auto'. + label_list (str | None, optional): Path of label_list. Default: None. + + Returns: + list: List of samples. + """ + + def _determine_task(parts): + if len(parts) in (3, 5): + task = 'cd' + elif len(parts) == 2: + if parts[1].isdigit(): + task = 'clas' + elif is_pic(osp.join(prefix, parts[1])): + task = 'seg' + else: + task = 'det' + else: + raise RuntimeError( + "Cannot automatically determine the task type. Please specify `task` manually." + ) + return task + + if task not in ('seg', 'det', 'cd', 'clas', 'auto'): + raise ValueError("Invalid value of `task`") + + samples = [] + ctor = None + with open(file_list, 'r') as f: + for line in f: + line = line.strip() + parts = line.split() + if task == 'auto': + task = _determine_task(parts) + if ctor is None: + # Select and build sample constructor + ctor_class = globals()['Constr' + task.capitalize() + 'Sample'] + ctor = ctor_class(prefix, label_list) + sample = ctor(*parts) + if isinstance(sample, list): + samples.extend(sample) + else: + samples.append(sample) + + return samples diff --git a/tests/datasets/__init__.py b/tests/datasets/__init__.py new file mode 100644 index 00000000..29c8b7d1 --- /dev/null +++ b/tests/datasets/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/deploy/__init__.py b/tests/deploy/__init__.py new file mode 100644 index 00000000..cceea0e7 --- /dev/null +++ b/tests/deploy/__init__.py @@ -0,0 +1,15 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .test_predictor import * diff --git a/tests/deploy/test_predictor.py b/tests/deploy/test_predictor.py new file mode 100644 index 00000000..141556b6 --- /dev/null +++ b/tests/deploy/test_predictor.py @@ -0,0 +1,351 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path as osp +import tempfile +import unittest.mock as mock + +import paddle + +import paddlers as pdrs +from paddlers.transforms import decode_image +from testing_utils import CommonTest, run_script + +__all__ = [ + 'TestCDPredictor', 'TestClasPredictor', 'TestDetPredictor', + 'TestSegPredictor' +] + + +class TestPredictor(CommonTest): + MODULE = pdrs.tasks + TRAINER_NAME_TO_EXPORT_OPTS = {} + WHITE_LIST = [] + + @staticmethod + def add_tests(cls): + """ + Automatically patch testing functions to cls. + """ + + def _test_predictor(trainer_name): + def _test_predictor_impl(self): + trainer_class = getattr(self.MODULE, trainer_name) + # Construct trainer with default parameters + # TODO: Load pretrained weights to avoid numeric problems + trainer = trainer_class() + with tempfile.TemporaryDirectory() as td: + dynamic_model_dir = osp.join(td, "dynamic") + static_model_dir = osp.join(td, "static") + # HACK: BaseModel.save_model() requires BaseModel().optimizer to be set + optimizer = mock.Mock() + optimizer.state_dict.return_value = {'foo': 'bar'} + trainer.optimizer = optimizer + trainer.save_model(dynamic_model_dir) + export_cmd = f"python export_model.py --model_dir {dynamic_model_dir} --save_dir {static_model_dir} " + if trainer_name in self.TRAINER_NAME_TO_EXPORT_OPTS: + export_cmd += self.TRAINER_NAME_TO_EXPORT_OPTS[ + trainer_name] + elif '_default' in self.TRAINER_NAME_TO_EXPORT_OPTS: + export_cmd += self.TRAINER_NAME_TO_EXPORT_OPTS[ + '_default'] + run_script(export_cmd, wd="../deploy/export") + # Construct predictor + # TODO: Test trt and mkl + predictor = pdrs.deploy.Predictor( + static_model_dir, + use_gpu=paddle.device.get_device().startswith('gpu')) + self.check_predictor(predictor, trainer) + + return _test_predictor_impl + + for trainer_name in cls.MODULE.__all__: + if trainer_name in cls.WHITE_LIST: + continue + setattr(cls, 'test_' + trainer_name, _test_predictor(trainer_name)) + + return cls + + def check_predictor(self, predictor, trainer): + raise NotImplementedError + + def check_dict_equal( + self, + dict_, + expected_dict, + ignore_keys=('label_map', 'mask', 'category', 'category_id')): + # By default do not compare label_maps, masks, or categories, + # because numeric errors could result in large difference in labels. + if isinstance(dict_, list): + self.assertIsInstance(expected_dict, list) + self.assertEqual(len(dict_), len(expected_dict)) + for d1, d2 in zip(dict_, expected_dict): + self.check_dict_equal(d1, d2, ignore_keys=ignore_keys) + else: + assert isinstance(dict_, dict) + assert isinstance(expected_dict, dict) + self.assertEqual(dict_.keys(), expected_dict.keys()) + ignore_keys = set() if ignore_keys is None else set(ignore_keys) + for key in dict_.keys(): + if key in ignore_keys: + continue + # Use higher tolerance + self.check_output_equal( + dict_[key], expected_dict[key], rtol=1.e-4, atol=1.e-6) + + +@TestPredictor.add_tests +class TestCDPredictor(TestPredictor): + MODULE = pdrs.tasks.change_detector + TRAINER_NAME_TO_EXPORT_OPTS = { + '_default': "--fixed_input_shape [-1,3,256,256]" + } + # HACK: Skip CDNet. + # These models are heavily affected by numeric errors. + WHITE_LIST = ['CDNet'] + + def check_predictor(self, predictor, trainer): + t1_path = "data/ssmt/optical_t1.bmp" + t2_path = "data/ssmt/optical_t2.bmp" + single_input = (t1_path, t2_path) + num_inputs = 2 + transforms = pdrs.transforms.Compose([pdrs.transforms.Normalize()]) + + # Expected failure + with self.assertRaises(ValueError): + predictor.predict(t1_path, transforms=transforms) + + # Single input (file paths) + input_ = single_input + out_single_file_p = predictor.predict(input_, transforms=transforms) + out_single_file_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_file_p, out_single_file_t) + out_single_file_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_file_list_p), 1) + self.check_dict_equal(out_single_file_list_p[0], out_single_file_p) + out_single_file_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_file_list_p[0], + out_single_file_list_t[0]) + + # Single input (ndarrays) + input_ = (decode_image( + t1_path, to_rgb=False), decode_image( + t2_path, to_rgb=False)) # Reuse the name `input_` + out_single_array_p = predictor.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_file_p) + out_single_array_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_array_t) + out_single_array_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_array_list_p), 1) + self.check_dict_equal(out_single_array_list_p[0], out_single_array_p) + out_single_array_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_array_list_p[0], + out_single_array_list_t[0]) + + # Multiple inputs (file paths) + input_ = [single_input] * num_inputs # Reuse the name `input_` + out_multi_file_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_p), num_inputs) + out_multi_file_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_t), num_inputs) + + # Multiple inputs (ndarrays) + input_ = [(decode_image( + t1_path, to_rgb=False), decode_image( + t2_path, to_rgb=False))] * num_inputs # Reuse the name `input_` + out_multi_array_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_p), num_inputs) + out_multi_array_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_t), num_inputs) + + +@TestPredictor.add_tests +class TestClasPredictor(TestPredictor): + MODULE = pdrs.tasks.classifier + TRAINER_NAME_TO_EXPORT_OPTS = { + '_default': "--fixed_input_shape [-1,3,256,256]" + } + + def check_predictor(self, predictor, trainer): + single_input = "data/ssmt/optical_t1.bmp" + num_inputs = 2 + transforms = pdrs.transforms.Compose([pdrs.transforms.Normalize()]) + labels = list(range(2)) + trainer.labels = labels + predictor._model.labels = labels + + # Single input (file path) + input_ = single_input + out_single_file_p = predictor.predict(input_, transforms=transforms) + out_single_file_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_file_p, out_single_file_t) + out_single_file_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_file_list_p), 1) + self.check_dict_equal(out_single_file_list_p[0], out_single_file_p) + out_single_file_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_file_list_p[0], + out_single_file_list_t[0]) + + # Single input (ndarray) + input_ = decode_image( + single_input, to_rgb=False) # Reuse the name `input_` + out_single_array_p = predictor.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_file_p) + out_single_array_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_array_t) + out_single_array_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_array_list_p), 1) + self.check_dict_equal(out_single_array_list_p[0], out_single_array_p) + out_single_array_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_array_list_p[0], + out_single_array_list_t[0]) + + # Multiple inputs (file paths) + input_ = [single_input] * num_inputs # Reuse the name `input_` + out_multi_file_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_p), num_inputs) + out_multi_file_t = trainer.predict(input_, transforms=transforms) + # Check value consistence + self.check_dict_equal(out_multi_file_p, out_multi_file_t) + + # Multiple inputs (ndarrays) + input_ = [decode_image( + single_input, to_rgb=False)] * num_inputs # Reuse the name `input_` + out_multi_array_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_p), num_inputs) + out_multi_array_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_multi_array_p, out_multi_array_t) + + +@TestPredictor.add_tests +class TestDetPredictor(TestPredictor): + MODULE = pdrs.tasks.object_detector + TRAINER_NAME_TO_EXPORT_OPTS = { + '_default': "--fixed_input_shape [-1,3,256,256]" + } + + def check_predictor(self, predictor, trainer): + # For detection tasks, do NOT ensure the consistence of bboxes. + # This is because the coordinates of bboxes were observed to be very sensitive to numeric errors, + # given that the network is (partially?) randomly initialized. + single_input = "data/ssmt/optical_t1.bmp" + num_inputs = 2 + transforms = pdrs.transforms.Compose([pdrs.transforms.Normalize()]) + labels = list(range(80)) + trainer.labels = labels + predictor._model.labels = labels + + # Single input (file path) + input_ = single_input + predictor.predict(input_, transforms=transforms) + trainer.predict(input_, transforms=transforms) + out_single_file_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_file_list_p), 1) + out_single_file_list_t = trainer.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_file_list_t), 1) + + # Single input (ndarray) + input_ = decode_image( + single_input, to_rgb=False) # Reuse the name `input_` + predictor.predict(input_, transforms=transforms) + trainer.predict(input_, transforms=transforms) + out_single_array_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_array_list_p), 1) + out_single_array_list_t = trainer.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_array_list_t), 1) + + # Multiple inputs (file paths) + input_ = [single_input] * num_inputs # Reuse the name `input_` + out_multi_file_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_p), num_inputs) + out_multi_file_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_t), num_inputs) + + # Multiple inputs (ndarrays) + input_ = [decode_image( + single_input, to_rgb=False)] * num_inputs # Reuse the name `input_` + out_multi_array_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_p), num_inputs) + out_multi_array_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_t), num_inputs) + + +@TestPredictor.add_tests +class TestSegPredictor(TestPredictor): + MODULE = pdrs.tasks.segmenter + TRAINER_NAME_TO_EXPORT_OPTS = { + '_default': "--fixed_input_shape [-1,3,256,256]" + } + + def check_predictor(self, predictor, trainer): + single_input = "data/ssmt/optical_t1.bmp" + num_inputs = 2 + transforms = pdrs.transforms.Compose([pdrs.transforms.Normalize()]) + + # Single input (file path) + input_ = single_input + out_single_file_p = predictor.predict(input_, transforms=transforms) + out_single_file_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_file_p, out_single_file_t) + out_single_file_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_file_list_p), 1) + self.check_dict_equal(out_single_file_list_p[0], out_single_file_p) + out_single_file_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_file_list_p[0], + out_single_file_list_t[0]) + + # Single input (ndarray) + input_ = decode_image( + single_input, to_rgb=False) # Reuse the name `input_` + out_single_array_p = predictor.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_file_p) + out_single_array_t = trainer.predict(input_, transforms=transforms) + self.check_dict_equal(out_single_array_p, out_single_array_t) + out_single_array_list_p = predictor.predict( + [input_], transforms=transforms) + self.assertEqual(len(out_single_array_list_p), 1) + self.check_dict_equal(out_single_array_list_p[0], out_single_array_p) + out_single_array_list_t = trainer.predict( + [input_], transforms=transforms) + self.check_dict_equal(out_single_array_list_p[0], + out_single_array_list_t[0]) + + # Multiple inputs (file paths) + input_ = [single_input] * num_inputs # Reuse the name `input_` + out_multi_file_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_p), num_inputs) + out_multi_file_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_file_t), num_inputs) + + # Multiple inputs (ndarrays) + input_ = [decode_image( + single_input, to_rgb=False)] * num_inputs # Reuse the name `input_` + out_multi_array_p = predictor.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_p), num_inputs) + out_multi_array_t = trainer.predict(input_, transforms=transforms) + self.assertEqual(len(out_multi_array_t), num_inputs) diff --git a/tests/download_test_data.sh b/tests/download_test_data.sh new file mode 100644 index 00000000..f672acd3 --- /dev/null +++ b/tests/download_test_data.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash + +function remove_dir_if_exist() { + local dir="$1" + if [ -d "${dir}" ]; then + rm -rf "${dir}" + echo "\033[0;31mDirectory ${dir} has been removed.\033[0m" + fi +} + +## Remove old directories (if they exist) +remove_dir_if_exist 'data/ssst' +remove_dir_if_exist 'data/ssmt' + +## Download and unzip +curl -kL https://paddlers.bj.bcebos.com/tests/data/ssst.tar.gz -o data/ssst.tar.gz +tar -zxf data/ssst.tar.gz -C data/ + +curl -kL https://paddlers.bj.bcebos.com/tests/data/ssmt.tar.gz -o data/ssmt.tar.gz +tar -zxf data/ssmt.tar.gz -C data/ diff --git a/tests/fast_tests.py b/tests/fast_tests.py new file mode 100644 index 00000000..8e7c26ed --- /dev/null +++ b/tests/fast_tests.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from rs_models import * +from transforms import * diff --git a/tests/rs_models/__init__.py b/tests/rs_models/__init__.py new file mode 100644 index 00000000..1206c558 --- /dev/null +++ b/tests/rs_models/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .test_cd_models import * +from .test_clas_models import * +from .test_det_models import * +from .test_seg_models import * diff --git a/tests/rs_models/test_cd_models.py b/tests/rs_models/test_cd_models.py new file mode 100644 index 00000000..9c39fed5 --- /dev/null +++ b/tests/rs_models/test_cd_models.py @@ -0,0 +1,221 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import platform +from itertools import cycle + +import paddlers +from rs_models.test_model import TestModel + +__all__ = [ + 'TestBITModel', 'TestCDNetModel', 'TestChangeStarModel', 'TestDSAMNetModel', + 'TestDSIFNModel', 'TestFCEarlyFusionModel', 'TestFCSiamConcModel', + 'TestFCSiamDiffModel', 'TestSNUNetModel', 'TestSTANetModel' +] + + +class TestCDModel(TestModel): + EF_MODE = 'None' # Early-fusion strategy + + def check_output(self, output, target): + self.assertIsInstance(output, list) + self.check_output_equal(len(output), len(target)) + for o, t in zip(output, target): + o = o.numpy() + self.check_output_equal(o.shape[0], t.shape[0]) + self.check_output_equal(len(o.shape), 4) + self.check_output_equal(o.shape[2:], t.shape[2:]) + + def set_inputs(self): + if self.EF_MODE == 'Concat': + # Early-fusion + def _gen_data(specs): + for spec in specs: + c = spec['in_channels'] // 2 + assert c * 2 == spec['in_channels'] + yield [self.get_randn_tensor(c), self.get_randn_tensor(c)] + elif self.EF_MODE == 'None': + # Late-fusion + def _gen_data(specs): + for spec in specs: + c = spec.get('in_channels', 3) + yield [self.get_randn_tensor(c), self.get_randn_tensor(c)] + else: + raise ValueError + self.inputs = _gen_data(self.specs) + + def set_targets(self): + def _gen_data(specs): + for spec in specs: + c = spec.get('num_classes', 2) + yield [self.get_zeros_array(c)] + + self.targets = _gen_data(self.specs) + + +class TestBITModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.BIT + + def set_specs(self): + base_spec = dict(in_channels=3, num_classes=2) + self.specs = [ + base_spec, + dict(**base_spec, backbone='resnet34'), + dict(**base_spec, n_stages=3), + dict(**base_spec, enc_depth=4, dec_head_dim=16), + dict(in_channels=4, num_classes=2), + dict(in_channels=3, num_classes=8) + ] # yapf: disable + + +class TestCDNetModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.CDNet + EF_MODE = 'Concat' + + def set_specs(self): + self.specs = [ + dict(in_channels=6, num_classes=2), + dict(in_channels=8, num_classes=2), + dict(in_channels=6, num_classes=8) + ] # yapf: disable + + +class TestChangeStarModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.ChangeStar + + def set_specs(self): + self.specs = [ + dict(num_classes=2), dict(num_classes=10), + dict(num_classes=2, mid_channels=128, num_convs=2), + dict(num_classes=2, _phase='eval', _stop_grad=True) + ] # yapf: disable + + def set_targets(self): + # Avoid allocation of large memories + tar_c2 = [self.get_zeros_array(2)] * 4 + self.targets = [ + tar_c2, + [self.get_zeros_array(10)] * 2 + [self.get_zeros_array(2)] * 2, + tar_c2, [self.get_zeros_array(2)] + ] + + +class TestDSAMNetModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.DSAMNet + + def set_specs(self): + base_spec = dict(in_channels=3, num_classes=2) + self.specs = [ + base_spec, + dict(in_channels=8, num_classes=2), + dict(in_channels=3, num_classes=8), + dict(**base_spec, ca_ratio=4, sa_kernel=5), + dict(**base_spec, _phase='eval', _stop_grad=True) + ] # yapf: disable + + def set_targets(self): + # Avoid allocation of large memories + tar_c2 = [self.get_zeros_array(2)] * 3 + self.targets = [ + tar_c2, tar_c2, [self.get_zeros_array(8)] * 3, tar_c2, + [self.get_zeros_array(2)] + ] + + +class TestDSIFNModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.DSIFN + + def set_specs(self): + self.specs = [ + dict(num_classes=2), dict(num_classes=10), + dict(num_classes=2, use_dropout=True), + dict(num_classes=2, _phase='eval', _stop_grad=True) + ] # yapf: disable + + def set_targets(self): + # Avoid allocation of large memories + tar_c2 = [self.get_zeros_array(2)] * 5 + self.targets = [ + tar_c2, [self.get_zeros_array(10)] * 5, tar_c2, + [self.get_zeros_array(2)] + ] + + +class TestFCEarlyFusionModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.FCEarlyFusion + EF_MODE = 'Concat' + + def set_specs(self): + self.specs = [ + dict(in_channels=6, num_classes=2), + dict(in_channels=8, num_classes=2), + dict(in_channels=6, num_classes=8), + dict(in_channels=6, num_classes=2, use_dropout=True) + ] # yapf: disable + + +class TestFCSiamConcModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.FCSiamConc + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=8, num_classes=2), + dict(in_channels=3, num_classes=8), + dict(in_channels=3, num_classes=2, use_dropout=True) + ] # yapf: disable + + +class TestFCSiamDiffModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.FCSiamDiff + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=8, num_classes=2), + dict(in_channels=3, num_classes=8), + dict(in_channels=3, num_classes=2, use_dropout=True) + ] # yapf: disable + + +class TestSNUNetModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.SNUNet + + def set_specs(self): + self.specs = [ + dict(in_channels=3, num_classes=2), + dict(in_channels=8, num_classes=2), + dict(in_channels=3, num_classes=8), + dict(in_channels=3, num_classes=2, width=64) + ] # yapf: disable + + +class TestSTANetModel(TestCDModel): + MODEL_CLASS = paddlers.custom_models.cd.STANet + + def set_specs(self): + base_spec = dict(in_channels=3, num_classes=2) + self.specs = [ + base_spec, + dict(in_channels=8, num_classes=2), + dict(in_channels=3, num_classes=8), + dict(**base_spec, att_type='PAM'), + dict(**base_spec, ds_factor=4) + ] # yapf: disable + + +# HACK:FIXME: We observe an OOM error when running TestSTANetModel.test_forward() on a Windows machine. +# Currently, we do not perform this test. +if platform.system() == 'Windows': + TestSTANetModel.test_forward = lambda self: None diff --git a/tests/rs_models/test_clas_models.py b/tests/rs_models/test_clas_models.py new file mode 100644 index 00000000..ab184fa3 --- /dev/null +++ b/tests/rs_models/test_clas_models.py @@ -0,0 +1,38 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddlers +from rs_models.test_model import TestModel + +__all__ = [] + + +class TestCDModel(TestModel): + DEFAULT_HW = (224, 224) + + def check_output(self, output, target): + # target is the expected shape of the output + self.check_output_equal(output.numpy().shape, target) + + def set_inputs(self): + def _gen_data(specs): + for spec in specs: + c = spec.get('in_channels', 3) + yield [self.get_randn_tensor(c)] + + self.inputs = _gen_data(self.specs) + + def set_targets(self): + self.targets = [[self.DEFAULT_BATCH_SIZE, spec.get('num_classes', 2)] + for spec in self.specs] diff --git a/tests/rs_models/test_det_models.py b/tests/rs_models/test_det_models.py new file mode 100644 index 00000000..5aed6ef9 --- /dev/null +++ b/tests/rs_models/test_det_models.py @@ -0,0 +1,34 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from itertools import cycle + +from rs_models.test_model import TestModel + +__all__ = [] + + +class TestDetModel(TestModel): + DEFAULT_HW = (608, 608) + + def check_output(self, output, target): + self.assertIsInstance(output, dict) + self.assertIsInstance(output['mask'], list) + self.assertIn('bbox', output) + self.assertIn('bbox_num', output) + if 'mask' in output: + self.assertIsInstance(output['mask'], list) + + def set_inputs(self): + self.inputs = cycle([self.get_randn_tensor(3)]) diff --git a/tests/rs_models/test_model.py b/tests/rs_models/test_model.py new file mode 100644 index 00000000..06c47770 --- /dev/null +++ b/tests/rs_models/test_model.py @@ -0,0 +1,120 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect + +import paddle +import numpy as np +from paddle.static import InputSpec + +from testing_utils import CommonTest + + +class _TestModelNamespace: + class TestModel(CommonTest): + MODEL_CLASS = None + DEFAULT_HW = (256, 256) + DEFAULT_BATCH_SIZE = 2 + + def setUp(self): + self.set_specs() + self.set_inputs() + self.set_targets() + self.set_models() + + def test_forward(self): + for i, ( + input, model, target + ) in enumerate(zip(self.inputs, self.models, self.targets)): + with self.subTest(i=i): + if isinstance(input, list): + output = model(*input) + else: + output = model(input) + self.check_output(output, target) + + def test_to_static(self): + for i, ( + input, model, target + ) in enumerate(zip(self.inputs, self.models, self.targets)): + with self.subTest(i=i): + static_model = paddle.jit.to_static( + model, input_spec=self.get_input_spec(model, input)) + + def check_output(self, output, target): + pass + + def set_specs(self): + self.specs = [] + + def set_models(self): + self.models = (self.build_model(spec) for spec in self.specs) + + def set_inputs(self): + self.inputs = [] + + def set_targets(self): + self.targets = [] + + def build_model(self, spec): + if '_phase' in spec: + phase = spec.pop('_phase') + else: + phase = 'train' + if '_stop_grad' in spec: + stop_grad = spec.pop('_stop_grad') + else: + stop_grad = False + + model = self.MODEL_CLASS(**spec) + + if phase == 'train': + model.train() + elif phase == 'eval': + model.eval() + if stop_grad: + for p in model.parameters(): + p.stop_gradient = True + + return model + + def get_shape(self, c, b=None, h=None, w=None): + if h is None or w is None: + h, w = self.DEFAULT_HW + if b is None: + b = self.DEFAULT_BATCH_SIZE + return (b, c, h, w) + + def get_zeros_array(self, c, b=None, h=None, w=None): + shape = self.get_shape(c, b, h, w) + return np.zeros(shape) + + def get_randn_tensor(self, c, b=None, h=None, w=None): + shape = self.get_shape(c, b, h, w) + return paddle.randn(shape) + + def get_input_spec(self, model, input): + if not isinstance(input, list): + input = [input] + input_spec = [] + for param_name, tensor in zip( + inspect.signature(model.forward).parameters, input): + # XXX: Hard-code dtype + input_spec.append( + InputSpec( + shape=tensor.shape, name=param_name, dtype='float32')) + return input_spec + + +TestModel = _TestModelNamespace.TestModel diff --git a/tests/rs_models/test_seg_models.py b/tests/rs_models/test_seg_models.py new file mode 100644 index 00000000..e3c79f5c --- /dev/null +++ b/tests/rs_models/test_seg_models.py @@ -0,0 +1,56 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import paddlers +from rs_models.test_model import TestModel + +__all__ = ['TestFarSegModel'] + + +class TestSegModel(TestModel): + DEFAULT_HW = (512, 512) + + def check_output(self, output, target): + self.assertIsInstance(output, list) + self.check_output_equal(len(output), len(target)) + for o, t in zip(output, target): + o = o.numpy() + self.check_output_equal(o.shape[0], t.shape[0]) + self.check_output_equal(len(o.shape), 4) + self.check_output_equal(o.shape[2:], t.shape[2:]) + + def set_inputs(self): + def _gen_data(specs): + for spec in specs: + c = spec.get('in_channels', 3) + yield self.get_randn_tensor(c) + + self.inputs = _gen_data(self.specs) + + def set_targets(self): + def _gen_data(specs): + for spec in specs: + c = spec.get('num_classes', 2) + yield [self.get_zeros_array(c)] + + self.targets = _gen_data(self.specs) + + +class TestFarSegModel(TestSegModel): + MODEL_CLASS = paddlers.custom_models.seg.FarSeg + + def set_specs(self): + self.specs = [ + dict(), dict(num_classes=20), dict(encoder_pretrained=False) + ] diff --git a/tests/run_fast_tests.sh b/tests/run_fast_tests.sh new file mode 100644 index 00000000..fac43985 --- /dev/null +++ b/tests/run_fast_tests.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +bash download_test_data.sh +python -m unittest -v fast_tests \ No newline at end of file diff --git a/tests/run_tests.sh b/tests/run_tests.sh new file mode 100644 index 00000000..20c59860 --- /dev/null +++ b/tests/run_tests.sh @@ -0,0 +1,4 @@ +#!/usr/bin/env bash + +bash download_test_data.sh +python -m unittest discover -v \ No newline at end of file diff --git a/tests/tasks/__init__.py b/tests/tasks/__init__.py new file mode 100644 index 00000000..29c8b7d1 --- /dev/null +++ b/tests/tasks/__init__.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_examples.py b/tests/test_examples.py new file mode 100644 index 00000000..29c8b7d1 --- /dev/null +++ b/tests/test_examples.py @@ -0,0 +1,13 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/tests/test_tutorials.py b/tests/test_tutorials.py new file mode 100644 index 00000000..48535274 --- /dev/null +++ b/tests/test_tutorials.py @@ -0,0 +1,89 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os.path as osp +import re +import tempfile +import shutil +from glob import iglob + +from testing_utils import run_script, CpuCommonTest + + +class TestTutorial(CpuCommonTest): + SUBDIR = "./" + TIMEOUT = 300 + REGEX = ".*" + + @classmethod + def setUpClass(cls): + cls._td = tempfile.TemporaryDirectory(dir='./') + # Recursively copy the content of `cls.SUBDIR` to td. + # This is necessary for running scripts in td. + cls._TSUBDIR = osp.join(cls._td.name, osp.basename(cls.SUBDIR)) + shutil.copytree(cls.SUBDIR, cls._TSUBDIR) + return super().setUpClass() + + @classmethod + def tearDownClass(cls): + cls._td.cleanup() + + @staticmethod + def add_tests(cls): + """ + Automatically patch testing functions to cls. + """ + + def _test_tutorial(script_name): + def _test_tutorial_impl(self): + # Set working directory to `cls._TSUBDIR` such that the + # files generated by the script will be automatically cleaned. + run_script(f"python {script_name}", wd=cls._TSUBDIR) + + return _test_tutorial_impl + + for script_path in filter( + re.compile(cls.REGEX).match, + iglob(osp.join(cls.SUBDIR, '*.py'))): + script_name = osp.basename(script_path) + if osp.normpath(osp.join(cls.SUBDIR, script_name)) != osp.normpath( + script_path): + raise ValueError( + f"{script_name} should be directly contained in {cls.SUBDIR}" + ) + setattr(cls, 'test_' + osp.splitext(script_name)[0], + _test_tutorial(script_name)) + + return cls + + +@TestTutorial.add_tests +class TestCDTutorial(TestTutorial): + SUBDIR = "../tutorials/train/change_detection" + + +@TestTutorial.add_tests +class TestClasTutorial(TestTutorial): + SUBDIR = "../tutorials/train/classification" + + +@TestTutorial.add_tests +class TestDetTutorial(TestTutorial): + SUBDIR = "../tutorials/train/object_detection" + + +@TestTutorial.add_tests +class TestSegTutorial(TestTutorial): + SUBDIR = "../tutorials/train/semantic_segmentation" + REGEX = r".*(? 0, msg) + + return wrapper + + def _test_places(func): + """ + Setting the running place for each test. + """ + + def wrapper(self, *args, **kwargs): + places = self.places + for place in places: + paddle.set_device(place) + func(self, *args, **kwargs) + + return wrapper + + def _check_output_impl(self, + result, + expected_result, + rtol, + atol, + equal=True): + assertForNormalType = self.assertNotEqual + assertForFloat = self.assertFalse + if equal: + assertForNormalType = self.assertEqual + assertForFloat = self.assertTrue + + result_t = type(result) + error_msg = 'Output has diff at place:{}. \nExpect: {} \nBut Got: {} in class {}' + if result_t in [list, tuple]: + result_t = get_container_type(result) + if result_t in [ + str, int, bool, set, np.bool, np.int32, np.int64, np.str + ]: + assertForNormalType( + result, + expected_result, + msg=error_msg.format(paddle.get_device(), expected_result, + result, self.__class__.__name__)) + elif result_t in [float, np.ndarray, np.float32, np.float64]: + assertForFloat( + np.allclose( + result, expected_result, rtol=rtol, atol=atol), + msg=error_msg.format(paddle.get_device(), expected_result, + result, self.__class__.__name__)) + if result_t == np.ndarray: + assertForNormalType( + result.shape, + expected_result.shape, + msg=error_msg.format( + paddle.get_device(), expected_result.shape, + result.shape, self.__class__.__name__)) + else: + raise ValueError( + 'result type must be str, int, bool, set, np.bool, np.int32, ' + 'np.int64, np.str, float, np.ndarray, np.float32, np.float64' + ) + + def check_output_equal(self, + result, + expected_result, + rtol=1.e-5, + atol=1.e-8): + """ + Check whether result and expected result are equal, including shape. + + Args: + result: str, int, bool, set, np.ndarray. + The result needs to be checked. + expected_result: str, int, bool, set, np.ndarray. The type has to be same as result's. + Use the expected result to check result. + rtol: float + relative tolerance, default 1.e-5. + atol: float + absolute tolerance, default 1.e-8 + """ + + self._check_output_impl(result, expected_result, rtol, atol) + + def check_output_not_equal(self, + result, + expected_result, + rtol=1.e-5, + atol=1.e-8): + """ + Check whether result and expected result are not equal, including shape. + + Args: + result: str, int, bool, set, np.ndarray. + The result needs to be checked. + expected_result: str, int, bool, set, np.ndarray. The type has to be same as result's. + Use the expected result to check result. + rtol: float + relative tolerance, default 1.e-5. + atol: float + absolute tolerance, default 1.e-8 + """ + + self._check_output_impl( + result, expected_result, rtol, atol, equal=False) + + class CpuCommonTest(CommonTest): + def __init__(self, methodName='runTest'): + super(CpuCommonTest, self).__init__(methodName=methodName) + self.places = ['cpu'] + + +CommonTest = _CommonTestNamespace.CommonTest +CpuCommonTest = _CommonTestNamespace.CpuCommonTest diff --git a/tests/tools/__init__.py b/tests/tools/__init__.py new file mode 100644 index 00000000..e1517209 --- /dev/null +++ b/tests/tools/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .test_match import TestMatch +from .test_oif import TestOIF +from .test_pca import TestPCA +from .test_split import TestSplit diff --git a/tests/tools/test_match.py b/tests/tools/test_match.py new file mode 100644 index 00000000..a09496d0 --- /dev/null +++ b/tests/tools/test_match.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile + +from testing_utils import CpuCommonTest, run_script + + +class TestMatch(CpuCommonTest): + def test_script(self): + with tempfile.TemporaryDirectory() as td: + run_script( + f"python match.py --im1_path ../tests/data/ssmt/multispectral_t1.tif --im2_path ../tests/data/ssmt/multispectral_t1.tif --save_path {td}/out.tiff", + wd="../tools") diff --git a/tests/tools/test_oif.py b/tests/tools/test_oif.py new file mode 100644 index 00000000..0f8f5c04 --- /dev/null +++ b/tests/tools/test_oif.py @@ -0,0 +1,24 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile + +from testing_utils import CpuCommonTest, run_script + + +class TestOIF(CpuCommonTest): + def test_script(self): + run_script( + f"python oif.py --im_path ../tests/data/ssst/multispectral.tif", + wd="../tools") diff --git a/tests/tools/test_pca.py b/tests/tools/test_pca.py new file mode 100644 index 00000000..96efb0f3 --- /dev/null +++ b/tests/tools/test_pca.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile + +from testing_utils import CpuCommonTest, run_script + + +class TestPCA(CpuCommonTest): + def test_script(self): + with tempfile.TemporaryDirectory() as td: + run_script( + f"python pca.py --im_path ../tests/data/ssst/multispectral.tif --save_dir {td} --dim 5", + wd="../tools") diff --git a/tests/tools/test_split.py b/tests/tools/test_split.py new file mode 100644 index 00000000..263b850e --- /dev/null +++ b/tests/tools/test_split.py @@ -0,0 +1,25 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import tempfile + +from testing_utils import CpuCommonTest, run_script + + +class TestSplit(CpuCommonTest): + def test_script(self): + with tempfile.TemporaryDirectory() as td: + run_script( + f"python split.py --image_path ../tests/data/ssst/multispectral.tif --mask_path ../tests/data/ssst/multiclass_gt2.png --block_size 128 --save_dir {td}", + wd="../tools") diff --git a/tests/transforms/__init__.py b/tests/transforms/__init__.py new file mode 100644 index 00000000..21d1c8f9 --- /dev/null +++ b/tests/transforms/__init__.py @@ -0,0 +1,16 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .test_functions import * +from .test_operators import * diff --git a/tests/transforms/test_functions.py b/tests/transforms/test_functions.py new file mode 100644 index 00000000..f51152ed --- /dev/null +++ b/tests/transforms/test_functions.py @@ -0,0 +1,61 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import copy + +import paddlers.transforms as T +from testing_utils import CpuCommonTest +from data import build_input_from_file + +__all__ = ['TestMatchHistograms', 'TestMatchByRegression'] + + +class TestMatchHistograms(CpuCommonTest): + def setUp(self): + self.inputs = [ + build_input_from_file( + "data/ssmt/test_mixed_binary.txt", prefix="./data/ssmt") + ] + + def test_output_shape(self): + decoder = T.DecodeImg() + for input in copy.deepcopy(self.inputs): + for sample in input: + sample = decoder(sample) + im_out = T.functions.match_histograms(sample['image'], + sample['image2']) + self.check_output_equal(im_out.shape, sample['image2'].shape) + im_out = T.functions.match_histograms(sample['image2'], + sample['image']) + self.check_output_equal(im_out.shape, sample['image'].shape) + + +class TestMatchByRegression(CpuCommonTest): + def setUp(self): + self.inputs = [ + build_input_from_file( + "data/ssmt/test_mixed_binary.txt", prefix="./data/ssmt") + ] + + def test_output_shape(self): + decoder = T.DecodeImg() + for input in copy.deepcopy(self.inputs): + for sample in input: + sample = decoder(sample) + im_out = T.functions.match_by_regression(sample['image'], + sample['image2']) + self.check_output_equal(im_out.shape, sample['image2'].shape) + im_out = T.functions.match_by_regression(sample['image2'], + sample['image']) + self.check_output_equal(im_out.shape, sample['image'].shape) diff --git a/tests/transforms/test_operators.py b/tests/transforms/test_operators.py new file mode 100644 index 00000000..f6320ae6 --- /dev/null +++ b/tests/transforms/test_operators.py @@ -0,0 +1,313 @@ +# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import inspect +import copy + +import numpy as np + +import paddlers.transforms as T +from testing_utils import CpuCommonTest +from data import build_input_from_file + +__all__ = ['TestTransform', 'TestCompose', 'TestArrange'] + +WHITE_LIST = [] + + +def _add_op_tests(cls): + """ + Automatically patch testing functions for transform operators. + """ + + for op_name in T.operators.__all__: + op_class = getattr(T.operators, op_name) + if isinstance(op_class, type) and issubclass(op_class, + T.operators.Transform): + if op_class is T.DecodeImg or op_class in WHITE_LIST or op_name in WHITE_LIST: + continue + attr_name = 'test_' + op_name + if hasattr(cls, attr_name): + continue + # If the operator cannot be initialized with default parameters, skip it. + for key, param in inspect.signature( + op_class.__init__).parameters.items(): + if key == 'self': + continue + if param.default is param.empty: + break + else: + filter_ = OP2FILTER.get(op_name, None) + setattr( + cls, attr_name, make_test_func( + op_class, filter_=filter_)) + return cls + + +def make_test_func(op_class, + *args, + in_hook=None, + out_hook=None, + filter_=None, + **kwargs): + def _test_func(self): + op = op_class(*args, **kwargs) + decoder = T.DecodeImg() + inputs = map(decoder, copy.deepcopy(self.inputs)) + for i, input_ in enumerate(inputs): + if filter_ is not None: + input_ = filter_(input_) + with self.subTest(i=i): + for sample in input_: + if in_hook: + sample = in_hook(sample) + sample = op(sample) + if out_hook: + sample = out_hook(sample) + + return _test_func + + +class _InputFilter(object): + def __init__(self, conds): + self.conds = conds + + def __call__(self, samples): + for sample in samples: + for cond in self.conds: + if cond(sample): + yield sample + + def __or__(self, filter): + return _InputFilter(self.conds + filter.conds) + + def __and__(self, filter): + return _InputFilter( + [cond for cond in self.conds if cond in filter.conds]) + + def get_sample(self, input): + return input[0] + + +def _is_optical(sample): + return sample['image'].shape[2] == 3 + + +def _is_sar(sample): + return sample['image'].shape[2] == 1 + + +def _is_multispectral(sample): + return sample['image'].shape[2] > 3 + + +def _is_mt(sample): + return 'image2' in sample + + +def _is_seg(sample): + return 'mask' in sample and 'image2' not in sample + + +def _is_det(sample): + return 'gt_bbox' in sample or 'gt_poly' in sample + + +def _is_clas(sample): + return 'label' in sample + + +_filter_only_optical = _InputFilter([_is_optical]) +_filter_only_sar = _InputFilter([_is_sar]) +_filter_only_multispectral = _InputFilter([_is_multispectral]) +_filter_no_multispectral = _filter_only_optical | _filter_only_sar +_filter_no_sar = _filter_only_optical | _filter_only_multispectral +_filter_no_optical = _filter_only_sar | _filter_only_multispectral +_filter_only_mt = _InputFilter([_is_mt]) +_filter_no_det = _InputFilter([_is_seg, _is_clas, _is_mt]) + +OP2FILTER = { + 'RandomSwap': _filter_only_mt, + 'SelectBand': _filter_no_sar, + 'Dehaze': _filter_only_optical, + 'Normalize': _filter_only_optical, + 'RandomDistort': _filter_only_optical +} + + +@_add_op_tests +class TestTransform(CpuCommonTest): + def setUp(self): + self.inputs = [ + build_input_from_file( + "data/ssst/test_optical_clas.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_sar_clas.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_multispectral_clas.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_optical_seg.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_sar_seg.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_multispectral_seg.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssst/test_optical_det.txt", + prefix="./data/ssst", + label_list="data/ssst/labels_det.txt"), + build_input_from_file( + "data/ssst/test_sar_det.txt", + prefix="./data/ssst", + label_list="data/ssst/labels_det.txt"), + build_input_from_file( + "data/ssst/test_multispectral_det.txt", + prefix="./data/ssst", + label_list="data/ssst/labels_det.txt"), + build_input_from_file( + "data/ssst/test_det_coco.txt", + prefix="./data/ssst"), + build_input_from_file( + "data/ssmt/test_mixed_binary.txt", + prefix="./data/ssmt"), + build_input_from_file( + "data/ssmt/test_mixed_multiclass.txt", + prefix="./data/ssmt"), + build_input_from_file( + "data/ssmt/test_mixed_multitask.txt", + prefix="./data/ssmt") + ] # yapf: disable + + def test_DecodeImg(self): + decoder = T.DecodeImg(to_rgb=True) + for i, input in enumerate(self.inputs): + with self.subTest(i=i): + for sample in input: + sample = decoder(sample) + # Check type + self.assertIsInstance(sample['image'], np.ndarray) + if 'mask' in sample: + self.assertIsInstance(sample['mask'], np.ndarray) + if 'aux_masks' in sample: + for aux_mask in sample['aux_masks']: + self.assertIsInstance(aux_mask, np.ndarray) + # TODO: Check dtype + + def test_Resize(self): + TARGET_SIZE = (128, 128) + + def _in_hook(sample): + self.image_shape = sample['image'].shape + if 'mask' in sample: + self.mask_shape = sample['mask'].shape + self.mask_values = set(sample['mask'].ravel()) + if 'aux_masks' in sample: + self.aux_mask_shapes = [ + aux_mask.shape for aux_mask in sample['aux_masks'] + ] + self.aux_mask_values = [ + set(aux_mask.ravel()) for aux_mask in sample['aux_masks'] + ] + return sample + + def _out_hook_not_keep_ratio(sample): + self.check_output_equal(sample['image'].shape[:2], TARGET_SIZE) + if 'image2' in sample: + self.check_output_equal(sample['image2'].shape[:2], TARGET_SIZE) + if 'mask' in sample: + self.check_output_equal(sample['mask'].shape[:2], TARGET_SIZE) + self.assertLessEqual( + set(sample['mask'].ravel()), self.mask_values) + if 'aux_masks' in sample: + for aux_mask in sample['aux_masks']: + self.check_output_equal(aux_mask.shape[:2], TARGET_SIZE) + for aux_mask, amv in zip(sample['aux_masks'], + self.aux_mask_values): + self.assertLessEqual(set(aux_mask.ravel()), amv) + # TODO: Test gt_bbox and gt_poly + return sample + + def _out_hook_keep_ratio(sample): + def __check_ratio(shape1, shape2): + self.check_output_equal(shape1[0] / shape1[1], + shape2[0] / shape2[1]) + + __check_ratio(sample['image'].shape, self.image_shape) + if 'image2' in sample: + __check_ratio(sample['image2'].shape, self.image_shape) + if 'mask' in sample: + __check_ratio(sample['mask'].shape, self.mask_shape) + if 'aux_masks' in sample: + for aux_mask, ori_aux_mask_shape in zip(sample['aux_masks'], + self.aux_mask_shapes): + __check_ratio(aux_mask.shape, ori_aux_mask_shape) + # TODO: Test gt_bbox and gt_poly + return sample + + test_func_not_keep_ratio = make_test_func( + T.Resize, + in_hook=_in_hook, + out_hook=_out_hook_not_keep_ratio, + target_size=TARGET_SIZE, + keep_ratio=False) + test_func_not_keep_ratio(self) + test_func_keep_ratio = make_test_func( + T.Resize, + in_hook=_in_hook, + out_hook=_out_hook_keep_ratio, + target_size=TARGET_SIZE, + keep_ratio=True) + test_func_keep_ratio(self) + + def test_RandomFlipOrRotate(self): + def _in_hook(sample): + if 'image2' in sample: + self.im_diff = ( + sample['image'] - sample['image2']).astype('float64') + elif 'mask' in sample: + self.im_diff = ( + sample['image'][..., 0] - sample['mask']).astype('float64') + return sample + + def _out_hook(sample): + im_diff = None + if 'image2' in sample: + im_diff = (sample['image'] - sample['image2']).astype('float64') + elif 'mask' in sample: + im_diff = ( + sample['image'][..., 0] - sample['mask']).astype('float64') + if im_diff is not None: + self.check_output_equal(im_diff.max(), self.im_diff.max()) + self.check_output_equal(im_diff.min(), self.im_diff.min()) + return sample + + test_func = make_test_func( + T.RandomFlipOrRotate, + in_hook=_in_hook, + out_hook=_out_hook, + filter_=_filter_no_det) + test_func(self) + + +class TestCompose(CpuCommonTest): + pass + + +class TestArrange(CpuCommonTest): + pass diff --git a/tools/coco2mask.py b/tools/coco2mask.py index a380dd0d..fae007d5 100644 --- a/tools/coco2mask.py +++ b/tools/coco2mask.py @@ -25,7 +25,7 @@ from tqdm import tqdm from PIL import Image -from utils import timer +from utils import time_it def _mkdir_p(path): @@ -69,30 +69,30 @@ def _read_geojson(json_path): return annotations, sizes -@timer -def convert_data(raw_folder, end_folder): +@time_it +def convert_data(raw_dir, end_dir): print("-- Initializing --") - img_folder = osp.join(raw_folder, "images") - save_img_folder = osp.join(end_folder, "img") - save_lab_folder = osp.join(end_folder, "gt") - _mkdir_p(save_img_folder) - _mkdir_p(save_lab_folder) - names = os.listdir(img_folder) + img_dir = osp.join(raw_dir, "images") + save_img_dir = osp.join(end_dir, "img") + save_lab_dir = osp.join(end_dir, "gt") + _mkdir_p(save_img_dir) + _mkdir_p(save_lab_dir) + names = os.listdir(img_dir) print("-- Loading annotations --") anns = {} sizes = {} - jsons = glob.glob(osp.join(raw_folder, "*.json")) + jsons = glob.glob(osp.join(raw_dir, "*.json")) for json in jsons: j_ann, j_size = _read_geojson(json) anns.update(j_ann) sizes.update(j_size) - print("-- Converting datas --") + print("-- Converting data --") for k in tqdm(names): # for k in tqdm(anns.keys()): - img_path = osp.join(img_folder, k) - img_save_path = osp.join(save_img_folder, k) + img_path = osp.join(img_dir, k) + img_save_path = osp.join(save_img_dir, k) ext = "." + k.split(".")[-1] - lab_save_path = osp.join(save_lab_folder, k.replace(ext, ".png")) + lab_save_path = osp.join(save_lab_dir, k.replace(ext, ".png")) shutil.copy(img_path, img_save_path) if k in anns.keys(): _save_mask(anns[k], sizes[k], lab_save_path) @@ -101,12 +101,12 @@ def convert_data(raw_folder, end_folder): lab_save_path) -parser = argparse.ArgumentParser(description="input parameters") -parser.add_argument("--raw_folder", type=str, required=True, \ - help="The folder path about original data, where `images` saves the original image, `annotation.json` saves the corresponding annotation information.") -parser.add_argument("--save_folder", type=str, required=True, \ - help="The folder path to save the results, where `img` saves the image and `gt` saves the label.") +parser = argparse.ArgumentParser() +parser.add_argument("--raw_dir", type=str, required=True, \ + help="Directory that contains original data, where `images` stores the original image and `annotation.json` stores the corresponding annotation information.") +parser.add_argument("--save_dir", type=str, required=True, \ + help="Directory to save the results, where `img` stores the image and `gt` stores the label.") if __name__ == "__main__": args = parser.parse_args() - convert_data(args.raw_folder, args.save_folder) + convert_data(args.raw_dir, args.save_dir) diff --git a/tools/coco_tools/json_AnnoSta.py b/tools/coco_tools/json_AnnoSta.py index 1a9388ca..57515bd4 100644 --- a/tools/coco_tools/json_AnnoSta.py +++ b/tools/coco_tools/json_AnnoSta.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - ''' @File Description: # json文件annotations信息,生成统计结果csv,对象框shape、对象看shape比例、对象框起始位置、对象结束位置、对象结束位置、对象类别、单个图像对象数量的分布 @@ -35,11 +34,13 @@ import seaborn as sns import matplotlib.pyplot as plt +shp_rate_bins = [ + 0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, + 1.6, 1.7, 1.8, 1.9, 2, 2.1, 2.2, 2.4, 2.6, 3, 3.5, 4, 5 +] -shp_rate_bins = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 1.1, 1.2, 1.3, 1.4, 1.5, 1.6, 1.7, 1.8, 1.9, 2, 2.1, - 2.2, 2.4, 2.6, 3, 3.5, 4, 5] -def check_dir(check_path,show=True): +def check_dir(check_path, show=True): if os.path.isdir(check_path): check_directory = check_path else: @@ -47,10 +48,12 @@ def check_dir(check_path,show=True): if not os.path.exists(check_directory): os.makedirs(check_directory) if show: - print('make dir:',check_directory) + print('make dir:', check_directory) + -def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_path, png_posEnd_path, png_cat_path, - png_objNum_path, get_relative, image_keyname, anno_keyname): +def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, + png_pos_path, png_posEnd_path, png_cat_path, png_objNum_path, + get_relative, image_keyname, anno_keyname): print('json read...\n') with open(js_path, 'r') as load_f: data = json.load(load_f) @@ -58,10 +61,15 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p df_img = pd.DataFrame(data[image_keyname]) sns.jointplot('height', 'width', data=df_img, kind='hex') plt.close() - df_img = df_img.rename(columns={"id": "image_id", "height": "image_height", "width": "image_width"}) + df_img = df_img.rename(columns={ + "id": "image_id", + "height": "image_height", + "width": "image_width" + }) df_anno = pd.DataFrame(data[anno_keyname]) - df_anno[['pox_x', 'pox_y', 'width', 'height']] = pd.DataFrame(df_anno['bbox'].values.tolist()) + df_anno[['pox_x', 'pox_y', 'width', 'height']] = pd.DataFrame(df_anno[ + 'bbox'].values.tolist()) df_anno['width'] = df_anno['width'].astype(int) df_anno['height'] = df_anno['height'].astype(int) @@ -84,8 +92,11 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p if png_shapeRate_path is not None: check_dir(png_shapeRate_path) plt.figure(figsize=(12, 8)) - df_merge['shape_rate'] = (df_merge['width'] / df_merge['height']).round(1) - df_merge['shape_rate'].value_counts(sort=False, bins=shp_rate_bins).plot(kind='bar', title='images shape rate') + df_merge['shape_rate'] = (df_merge['width'] / + df_merge['height']).round(1) + df_merge['shape_rate'].value_counts( + sort=False, bins=shp_rate_bins).plot( + kind='bar', title='images shape rate') plt.xticks(rotation=20) plt.savefig(png_shapeRate_path) plt.close() @@ -115,8 +126,10 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p print('png save to', png_posEnd_path) if get_relative: png_posEndR_path = png_posEnd_path.replace('.png', '_Relative.png') - df_merge['pox_y_endR'] = df_merge['pox_y_end'] / df_merge['image_height'] - df_merge['pox_x_endR'] = df_merge['pox_x_end'] / df_merge['image_width'] + df_merge['pox_y_endR'] = df_merge['pox_y_end'] / df_merge[ + 'image_height'] + df_merge['pox_x_endR'] = df_merge['pox_x_end'] / df_merge[ + 'image_width'] sns.jointplot('pox_y_endR', 'pox_x_endR', data=df_merge, kind='hex') plt.savefig(png_posEndR_path) plt.close() @@ -125,7 +138,8 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p if png_cat_path is not None: check_dir(png_cat_path) plt.figure(figsize=(12, 8)) - df_merge['category_id'].value_counts().sort_index().plot(kind='bar', title='obj category') + df_merge['category_id'].value_counts().sort_index().plot( + kind='bar', title='obj category') plt.savefig(png_cat_path) plt.close() print('png save to', png_cat_path) @@ -133,7 +147,8 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p if png_objNum_path is not None: check_dir(png_objNum_path) plt.figure(figsize=(12, 8)) - df_merge['image_id'].value_counts().value_counts().sort_index().plot(kind='bar', title='obj number per image') + df_merge['image_id'].value_counts().value_counts().sort_index().plot( + kind='bar', title='obj number per image') # df_merge['image_id'].value_counts().value_counts(bins=np.linspace(1,31,16)).sort_index().plot(kind='bar', title='obj number per image') plt.xticks(rotation=20) plt.savefig(png_objNum_path) @@ -147,37 +162,81 @@ def js_anno_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, png_pos_p def get_args(): - parser = argparse.ArgumentParser(description='Json Images Infomation Statistic') + parser = argparse.ArgumentParser( + description='Json Images Infomation Statistic') # parameters - parser.add_argument('--json_path', type=str, - help='json path to statistic images information') - parser.add_argument('--csv_path', type=str, default=None, - help='csv path to save statistic images information, default None, do not save') - - parser.add_argument('--png_shape_path', type=str, default=None, - help='png path to save statistic images shape information, default None, do not save') - parser.add_argument('--png_shapeRate_path', type=str, default=None, - help='png path to save statistic images shape rate information, default None, do not save') - - parser.add_argument('--png_pos_path', type=str, default=None, - help='png path to save statistic pos information, default None, do not save') - parser.add_argument('--png_posEnd_path', type=str, default=None, - help='png path to save statistic end pos information, default None, do not save') - - parser.add_argument('--png_cat_path', type=str, default=None, - help='png path to save statistic category information, default None, do not save') - parser.add_argument('--png_objNum_path', type=str, default=None, - help='png path to save statistic images object number information, default None, do not save') - - parser.add_argument('--get_relative', type=bool, default=True, - help='if True, get relative result') - parser.add_argument('--image_keyname', type=str, default='images', - help='image key name in json, default images') - parser.add_argument('--anno_keyname', type=str, default='annotations', - help='annotation key name in json, default annotations') - parser.add_argument('-Args_show', '--Args_show', type=bool, default=True, - help='Args_show(default: True), if True, show args info') + parser.add_argument( + '--json_path', + type=str, + help='json path to statistic images information') + parser.add_argument( + '--csv_path', + type=str, + default=None, + help='csv path to save statistic images information, default None, do not save' + ) + + parser.add_argument( + '--png_shape_path', + type=str, + default=None, + help='png path to save statistic images shape information, default None, do not save' + ) + parser.add_argument( + '--png_shapeRate_path', + type=str, + default=None, + help='png path to save statistic images shape rate information, default None, do not save' + ) + + parser.add_argument( + '--png_pos_path', + type=str, + default=None, + help='png path to save statistic pos information, default None, do not save' + ) + parser.add_argument( + '--png_posEnd_path', + type=str, + default=None, + help='png path to save statistic end pos information, default None, do not save' + ) + + parser.add_argument( + '--png_cat_path', + type=str, + default=None, + help='png path to save statistic category information, default None, do not save' + ) + parser.add_argument( + '--png_objNum_path', + type=str, + default=None, + help='png path to save statistic images object number information, default None, do not save' + ) + + parser.add_argument( + '--get_relative', + type=bool, + default=True, + help='if True, get relative result') + parser.add_argument( + '--image_keyname', + type=str, + default='images', + help='image key name in json, default images') + parser.add_argument( + '--anno_keyname', + type=str, + default='annotations', + help='annotation key name in json, default annotations') + parser.add_argument( + '-Args_show', + '--Args_show', + type=bool, + default=True, + help='Args_show(default: True), if True, show args info') args = parser.parse_args() @@ -191,8 +250,7 @@ def get_args(): if __name__ == '__main__': args = get_args() - js_anno_sta(args.json_path, args.csv_path, args.png_shape_path, args.png_shapeRate_path, - args.png_pos_path, args.png_posEnd_path, args.png_cat_path, args.png_objNum_path, + js_anno_sta(args.json_path, args.csv_path, args.png_shape_path, + args.png_shapeRate_path, args.png_pos_path, + args.png_posEnd_path, args.png_cat_path, args.png_objNum_path, args.get_relative, args.image_keyname, args.anno_keyname) - - diff --git a/tools/coco_tools/json_Img2Json.py b/tools/coco_tools/json_Img2Json.py index 72786173..fd3db4a2 100644 --- a/tools/coco_tools/json_Img2Json.py +++ b/tools/coco_tools/json_Img2Json.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - ''' @File Description: # 根据test影像文件夹生成test.json @@ -27,7 +26,8 @@ from tqdm import tqdm -def js_test(test_image_path, js_train_path, js_test_path, image_keyname, cat_keyname): +def js_test(test_image_path, js_train_path, js_test_path, image_keyname, + cat_keyname): print('Get Test'.center(100, '-')) print() @@ -63,18 +63,29 @@ def get_args(): parser = argparse.ArgumentParser(description='Get Test Json') # parameters - parser.add_argument('--test_image_path', type=str, - help='test image path') - parser.add_argument('--json_train_path', type=str, - help='train json path, provide categories information') - parser.add_argument('--json_test_path', type=str, - help='test json path to save') - parser.add_argument('--image_keyname', type=str, default='images', - help='image key name in json, default images') - parser.add_argument('--cat_keyname', type=str, default='categories', - help='categories key name in json, default categories') - parser.add_argument('-Args_show', '--Args_show', type=bool, default=True, - help='Args_show(default: True), if True, show args info') + parser.add_argument('--test_image_path', type=str, help='test image path') + parser.add_argument( + '--json_train_path', + type=str, + help='train json path, provide categories information') + parser.add_argument( + '--json_test_path', type=str, help='test json path to save') + parser.add_argument( + '--image_keyname', + type=str, + default='images', + help='image key name in json, default images') + parser.add_argument( + '--cat_keyname', + type=str, + default='categories', + help='categories key name in json, default categories') + parser.add_argument( + '-Args_show', + '--Args_show', + type=bool, + default=True, + help='Args_show(default: True), if True, show args info') args = parser.parse_args() @@ -88,7 +99,5 @@ def get_args(): if __name__ == '__main__': args = get_args() - js_test(args.test_image_path, args.json_train_path, args.json_test_path, args.image_keyname, args.cat_keyname) - - - + js_test(args.test_image_path, args.json_train_path, args.json_test_path, + args.image_keyname, args.cat_keyname) diff --git a/tools/coco_tools/json_ImgSta.py b/tools/coco_tools/json_ImgSta.py index 0eae85e6..accf888b 100644 --- a/tools/coco_tools/json_ImgSta.py +++ b/tools/coco_tools/json_ImgSta.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - ''' @File Description: # 统计json文件images信息,生成统计结果csv,同时生成图像shape、图像shape比例的二维分布图 @@ -30,7 +29,8 @@ import seaborn as sns import matplotlib.pyplot as plt -def check_dir(check_path,show=True): + +def check_dir(check_path, show=True): if os.path.isdir(check_path): check_directory = check_path else: @@ -38,9 +38,11 @@ def check_dir(check_path,show=True): if not os.path.exists(check_directory): os.makedirs(check_directory) if show: - print('make dir:',check_directory) + print('make dir:', check_directory) -def js_img_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, image_keyname): + +def js_img_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, + image_keyname): print('json read...\n') with open(js_path, 'r') as load_f: data = json.load(load_f) @@ -56,7 +58,8 @@ def js_img_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, image_keyn if png_shapeRate_path is not None: check_dir(png_shapeRate_path) df_img['shape_rate'] = (df_img['width'] / df_img['height']).round(1) - df_img['shape_rate'].value_counts().sort_index().plot(kind='bar', title='images shape rate') + df_img['shape_rate'].value_counts().sort_index().plot( + kind='bar', title='images shape rate') plt.savefig(png_shapeRate_path) plt.close() print('png save to', png_shapeRate_path) @@ -68,21 +71,43 @@ def js_img_sta(js_path, csv_path, png_shape_path, png_shapeRate_path, image_keyn def get_args(): - parser = argparse.ArgumentParser(description='Json Images Infomation Statistic') + parser = argparse.ArgumentParser( + description='Json Images Infomation Statistic') # parameters - parser.add_argument('--json_path', type=str, - help='json path to statistic images information') - parser.add_argument('--csv_path', type=str, default=None, - help='csv path to save statistic images information, default None, do not save') - parser.add_argument('--png_shape_path', type=str, default=None, - help='png path to save statistic images shape information, default None, do not save') - parser.add_argument('--png_shapeRate_path', type=str, default=None, - help='png path to save statistic images shape rate information, default None, do not save') - parser.add_argument('--image_keyname', type=str, default='images', - help='image key name in json, default images') - parser.add_argument('-Args_show', '--Args_show', type=bool, default=True, - help='Args_show(default: True), if True, show args info') + parser.add_argument( + '--json_path', + type=str, + help='json path to statistic images information') + parser.add_argument( + '--csv_path', + type=str, + default=None, + help='csv path to save statistic images information, default None, do not save' + ) + parser.add_argument( + '--png_shape_path', + type=str, + default=None, + help='png path to save statistic images shape information, default None, do not save' + ) + parser.add_argument( + '--png_shapeRate_path', + type=str, + default=None, + help='png path to save statistic images shape rate information, default None, do not save' + ) + parser.add_argument( + '--image_keyname', + type=str, + default='images', + help='image key name in json, default images') + parser.add_argument( + '-Args_show', + '--Args_show', + type=bool, + default=True, + help='Args_show(default: True), if True, show args info') args = parser.parse_args() @@ -96,5 +121,5 @@ def get_args(): if __name__ == '__main__': args = get_args() - js_img_sta(args.json_path, args.csv_path, args.png_shape_path, args.png_shapeRate_path, args.image_keyname) - + js_img_sta(args.json_path, args.csv_path, args.png_shape_path, + args.png_shapeRate_path, args.image_keyname) diff --git a/tools/coco_tools/json_InfoShow.py b/tools/coco_tools/json_InfoShow.py index 4532ec1c..f134a89a 100644 --- a/tools/coco_tools/json_InfoShow.py +++ b/tools/coco_tools/json_InfoShow.py @@ -11,7 +11,6 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. - ''' @File Description: # 输出json文件基本信息 @@ -25,49 +24,58 @@ def js_show(js_path, show_num): - print('Info'.center(100,'-')) + print('Info'.center(100, '-')) print('json read...') with open(js_path, 'r') as load_f: data = json.load(load_f) - - print('json keys:',data.keys(),'\n') + print('json keys:', data.keys(), '\n') for k, v in data.items(): print(k.center(50, '*')) - show_num_t = show_num if len(v)>show_num else len(v) + show_num_t = show_num if len(v) > show_num else len(v) if isinstance(v, list): - print(' Content Type: list\n Total Length: %d\n First %d record:\n'%(len(v),show_num_t)) + print(' Content Type: list\n Total Length: %d\n First %d record:\n' + % (len(v), show_num_t)) for i in range(show_num_t): print(v[i]) elif isinstance(v, dict): - print(' Content Type: dict\n Total Length: %d\n First %d record:\n'%(len(v),show_num_t)) - for i,(kv,vv) in enumerate(v.items()): - if i int: if type_name in ["bool", "uint8"]: @@ -53,14 +52,16 @@ def __init__(self, gdal_obj: Optional[gdal.Dataset]=None, band_list: Union[List[int], Tuple[int], None]=None, to_uint8: bool=False) -> None: - """ Class of read raster. + """ + Class of raster reader. + Args: - path (Optional[str]): The path of raster. - gdal_obj (Optional[Any], optional): The object of GDAL. Defaults to None. + path (Optional[str]): Path of raster file. + gdal_obj (Optional[Any], optional): GDAL dataset. Defaults to None. band_list (Union[List[int], Tuple[int], None], optional): - band list (start with 1) or None (all of bands). Defaults to None. + Select a set of bands (the band index starts from 1) or None (read all bands). Defaults to None. to_uint8 (bool, optional): - Convert uint8 or return raw data. Defaults to False. + Whether to convert data type to uint8. Defaults to False. """ super(Raster, self).__init__() if path is not None: @@ -75,25 +76,28 @@ def __init__(self, # https://www.osgeo.cn/gdal/drivers/raster/index.html self._src_data = gdal.Open(path) except: - raise TypeError( - "Unsupported data format: `{}`".format(self.ext_type)) + raise TypeError("Unsupported data format: `{}`".format( + self.ext_type)) else: raise ValueError("The path {0} not exists.".format(path)) else: if gdal_obj is not None: self._src_data = gdal_obj else: - raise ValueError("At least one of `path` and `gdal_obj` is not None.") + raise ValueError( + "At least one of `path` and `gdal_obj` is not None.") self.to_uint8 = to_uint8 self._getInfo() self.setBands(band_list) self._getType() def setBands(self, band_list: Union[List[int], Tuple[int], None]) -> None: - """ Set band of data. + """ + Set bands of data. + Args: band_list (Union[List[int], Tuple[int], None]): - band list (start with 1) or None (all of bands). + Select a set of bands (the band index starts from 1) or None (read all bands). Defaults to None. """ if band_list is not None: if len(band_list) > self.bands: @@ -105,16 +109,19 @@ def setBands(self, band_list: Union[List[int], Tuple[int], None]) -> None: format(str(self.bands))) self.band_list = band_list - def getArray( - self, - start_loc: Union[List[int], Tuple[int, int], None]=None, - block_size: Union[List[int], Tuple[int, int]]=[512, 512]) -> np.ndarray: - """ Get ndarray data + def getArray(self, + start_loc: Union[List[int], Tuple[int, int], None]=None, + block_size: Union[List[int], Tuple[int, int]]=[512, 512] + ) -> np.ndarray: + """ + Fetch data in a ndarray. + Args: start_loc (Union[List[int], Tuple[int], None], optional): - Coordinates of the upper left corner of the block, if None means return full image. + Coordinates of the upper left corner of the block. None value means returning full image. block_size (Union[List[int], Tuple[int]], optional): Block size. Defaults to [512, 512]. + Returns: np.ndarray: data's ndarray. """ @@ -144,7 +151,7 @@ def _getInfo(self) -> None: self.bands = 1 self.geot = None self.proj = None - + def _getType(self) -> None: d_name = self.getArray([0, 0], [1, 1]).dtype.name self.datatype = _get_type(d_name) @@ -159,9 +166,9 @@ def _getNumpy(self): ima = np.stack(band_array, axis=0) return ima - def _getArray( - self, - window: Union[None, List[int], Tuple[int, int, int, int]]=None) -> np.ndarray: + def _getArray(self, + window: Union[None, List[int], Tuple[int, int, int, int]]=None + ) -> np.ndarray: if self._src_data is None: raise ValueError("The raster is None.") if window is not None: @@ -193,10 +200,10 @@ def _getArray( ima = raster2uint8(ima) return ima - def _getBlock( - self, - start_loc: Union[List[int], Tuple[int, int]], - block_size: Union[List[int], Tuple[int, int]]=[512, 512]) -> np.ndarray: + def _getBlock(self, + start_loc: Union[List[int], Tuple[int, int]], + block_size: Union[List[int], Tuple[int, int]]=[512, 512] + ) -> np.ndarray: if len(start_loc) != 2 or len(block_size) != 2: raise ValueError("The length start_loc/block_size must be 2.") xoff, yoff = start_loc @@ -220,9 +227,9 @@ def _getBlock( return tmp -def save_geotiff(image: np.ndarray, - save_path: str, - proj: str, +def save_geotiff(image: np.ndarray, + save_path: str, + proj: str, geotf: Tuple, use_type: Optional[int]=None, clear_ds: bool=True) -> None: diff --git a/tools/utils/timer.py b/tools/utils/timer.py index 568d5890..7e6517cd 100644 --- a/tools/utils/timer.py +++ b/tools/utils/timer.py @@ -16,11 +16,12 @@ from functools import wraps -def timer(func): +def time_it(func): @wraps(func) - def wrapper(*args,**kwargs): + def wrapper(*args, **kwargs): start_time = time.time() - result = func(*args,**kwargs) - print("Total time: {0}.".format(time.time() - start_time)) + result = func(*args, **kwargs) + print("Total time consumed: {0}.".format(time.time() - start_time)) return result + return wrapper diff --git a/tools/utils/vector.py b/tools/utils/vector.py index f6bb2989..4d18338a 100644 --- a/tools/utils/vector.py +++ b/tools/utils/vector.py @@ -22,32 +22,27 @@ import osr -def vector_translate(geojson_path: str, +def translate_vector(geojson_path: str, wo_wkt: str, g_type: str="POLYGON", dim: str="XY") -> str: - ogr.RegisterAll() - gdal.SetConfigOption("GDAL_FILENAME_IS_UTF8", "YES") - data = ogr.Open(geojson_path) - layer = data.GetLayer() - spatial = layer.GetSpatialRef() - layerName = layer.GetName() - data.Destroy() - dstSRS = osr.SpatialReference() - dstSRS.ImportFromWkt(wo_wkt) - ext = "." + geojson_path.split(".")[-1] - save_path = geojson_path.replace(ext, ("_tmp" + ext)) - options = gdal.VectorTranslateOptions( - srcSRS=spatial, - dstSRS=dstSRS, - reproject=True, - layerName=layerName, - geometryType=g_type, - dim=dim - ) - gdal.VectorTranslate( - save_path, - srcDS=geojson_path, - options=options - ) - return save_path \ No newline at end of file + ogr.RegisterAll() + gdal.SetConfigOption("GDAL_FILENAME_IS_UTF8", "YES") + data = ogr.Open(geojson_path) + layer = data.GetLayer() + spatial = layer.GetSpatialRef() + layerName = layer.GetName() + data.Destroy() + dstSRS = osr.SpatialReference() + dstSRS.ImportFromWkt(wo_wkt) + ext = "." + geojson_path.split(".")[-1] + save_path = geojson_path.replace(ext, ("_tmp" + ext)) + options = gdal.VectorTranslateOptions( + srcSRS=spatial, + dstSRS=dstSRS, + reproject=True, + layerName=layerName, + geometryType=g_type, + dim=dim) + gdal.VectorTranslate(save_path, srcDS=geojson_path, options=options) + return save_path diff --git a/tutorials/train/change_detection/stanet.py b/tutorials/train/change_detection/stanet.py index a2da40fb..8332995e 100644 --- a/tutorials/train/change_detection/stanet.py +++ b/tutorials/train/change_detection/stanet.py @@ -85,4 +85,4 @@ # 是否启用VisualDL日志功能 use_vdl=True, # 指定从某个检查点继续训练 - resume_checkpoint=None) \ No newline at end of file + resume_checkpoint=None) diff --git a/tutorials/train/classification/condensenetv2_b_rs_mul.py b/tutorials/train/classification/condensenetv2_b_rs_mul.py index c581cbe9..95e60272 100644 --- a/tutorials/train/classification/condensenetv2_b_rs_mul.py +++ b/tutorials/train/classification/condensenetv2_b_rs_mul.py @@ -3,7 +3,7 @@ # 定义训练和验证时的transforms train_transforms = T.Compose([ - T.BandSelecting([5, 10, 15, 20, 25]), # for tet + T.SelectBand([5, 10, 15, 20, 25]), # for tet T.Resize(target_size=224), T.RandomHorizontalFlip(), T.Normalize( @@ -11,7 +11,7 @@ ]) eval_transforms = T.Compose([ - T.BandSelecting([5, 10, 15, 20, 25]), + T.SelectBand([5, 10, 15, 20, 25]), T.Resize(target_size=224), T.Normalize( mean=[0.5, 0.5, 0.5, 0.5, 0.5], std=[0.5, 0.5, 0.5, 0.5, 0.5]), @@ -46,4 +46,4 @@ train_batch_size=4, eval_dataset=eval_dataset, learning_rate=3e-4, - save_dir='output/condensenetv2_b') \ No newline at end of file + save_dir='output/condensenetv2_b')