diff --git a/configs/rscup/htc_deform_ga.py b/configs/rscup/htc_deform_ga.py new file mode 100644 index 0000000..c2b28d2 --- /dev/null +++ b/configs/rscup/htc_deform_ga.py @@ -0,0 +1,301 @@ +# model settings +fp16 = dict(loss_scale=512.) +model = dict( + type='HybridTaskCascade', + num_stages=3, + pretrained='modelzoo://resnet50', + interleaved=True, + mask_info_flow=True, + backbone=dict( + type='ResNet', + depth=50, + num_stages=4, + out_indices=(0, 1, 2, 3), + frozen_stages=1, + style='pytorch', + dcn=dict( + modulated=False, deformable_groups=1, fallback_on_stride=False), + stage_with_dcn=(False, True, True, True), + ), + neck=dict( + type='FPN', + in_channels=[256, 512, 1024, 2048], + out_channels=256, + num_outs=5), + rpn_head=dict( + type='GARPNHead', + in_channels=256, + feat_channels=256, + octave_base_scale=8, + scales_per_octave=3, + octave_ratios=[0.5, 1.0, 2.0], + anchor_strides=[4, 8, 16, 32, 64], + anchor_base_sizes=None, + anchoring_means=[.0, .0, .0, .0], + anchoring_stds=[0.07, 0.07, 0.14, 0.14], + target_means=(.0, .0, .0, .0), + target_stds=[0.07, 0.07, 0.11, 0.11], + loc_filter_thr=0.01, + loss_loc=dict( + type='FocalLoss', + use_sigmoid=True, + gamma=2.0, + alpha=0.25, + loss_weight=1.0), + loss_shape=dict(type='BoundedIoULoss', beta=0.2, loss_weight=1.0), + loss_cls=dict( + type='CrossEntropyLoss', use_sigmoid=True, loss_weight=1.0), + loss_bbox=dict(type='SmoothL1Loss', beta=1.0, loss_weight=1.0)), + bbox_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=7, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + bbox_head=[ + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=20, + target_means=[0., 0., 0., 0.], + target_stds=[0.1, 0.1, 0.2, 0.2], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict( + type='SmoothL1Loss', + beta=1.0, + loss_weight=1.0)), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=20, + target_means=[0., 0., 0., 0.], + target_stds=[0.05, 0.05, 0.1, 0.1], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict( + type='SmoothL1Loss', + beta=1.0, + loss_weight=1.0)), + dict( + type='SharedFCBBoxHead', + num_fcs=2, + in_channels=256, + fc_out_channels=1024, + roi_feat_size=7, + num_classes=20, + target_means=[0., 0., 0., 0.], + target_stds=[0.033, 0.033, 0.067, 0.067], + reg_class_agnostic=True, + loss_cls=dict( + type='CrossEntropyLoss', + use_sigmoid=False, + loss_weight=1.0), + loss_bbox=dict( + type='SmoothL1Loss', + beta=1.0, + loss_weight=1.0)) + ], + mask_roi_extractor=dict( + type='SingleRoIExtractor', + roi_layer=dict(type='RoIAlign', out_size=14, sample_num=2), + out_channels=256, + featmap_strides=[4, 8, 16, 32]), + mask_head=dict( + type='HTCMaskHead', + num_convs=4, + in_channels=256, + conv_out_channels=256, + num_classes=20, + loss_mask=dict( + type='CrossEntropyLoss', use_mask=True, loss_weight=1.0))) +# model training and testing settings +train_cfg = dict( + rpn=dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.7, + neg_iou_thr=0.3, + min_pos_iou=0.3, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=256, + pos_fraction=0.5, + neg_pos_ub=-1, + add_gt_as_proposals=False), + allowed_border=0, + pos_weight=-1, + debug=False), + rpn_proposal=dict( + nms_across_levels=False, + nms_pre=2000, + nms_post=2000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=[ + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.4, + neg_iou_thr=0.4, + min_pos_iou=0.4, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.5, + neg_iou_thr=0.5, + min_pos_iou=0.5, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False), + dict( + assigner=dict( + type='MaxIoUAssigner', + pos_iou_thr=0.6, + neg_iou_thr=0.6, + min_pos_iou=0.6, + ignore_iof_thr=-1), + sampler=dict( + type='RandomSampler', + num=512, + pos_fraction=0.25, + neg_pos_ub=-1, + add_gt_as_proposals=True), + mask_size=28, + pos_weight=-1, + debug=False) + ], + stage_loss_weights=[0.5, 1, 0.25]) +test_cfg = dict( + rpn=dict( + nms_across_levels=False, + nms_pre=1000, + nms_post=1000, + max_num=1000, + nms_thr=0.7, + min_bbox_size=0), + rcnn=dict( + score_thr=0.05, + nms=dict(type='nms', iou_thr=0.5), + max_per_img=1000, + mask_thr_binary=0.5), + keep_all_stages=False) +# dataset settings +dataset_type = 'CocoDataset' +data_root = './data/rscup/' +aug_root = "./data/rscup/aug/" +other_aug_root = "./data/rscup/otheraug/" +img_norm_cfg = dict( + mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=True) +data = dict( + imgs_per_gpu=6, + workers_per_gpu=4, + train=dict( + type=dataset_type, + ann_file=(data_root + 'annotation/annos_rscup_train.json', + aug_root + 'annos_rscup_airport.json', + other_aug_root + "annos_rscup_baseball-diamond.json", + other_aug_root + "annos_rscup_basketball-court.json", + other_aug_root + "annos_rscup_container-crane.json", + other_aug_root + "annos_rscup_helicopter.json", + other_aug_root + "annos_rscup_helipad.json", + other_aug_root + "annos_rscup_helipad_ship.json", + other_aug_root + "annos_rscup_roundabout.json", + other_aug_root + "annos_rscup_soccer-ball-field_ground-track-field.json", + ), + img_prefix=(data_root + 'train/', + aug_root + "airport/", + other_aug_root + "baseball-diamond", + other_aug_root + "basketball-court", + other_aug_root + "container-crane", + other_aug_root + "helicopter", + other_aug_root + "helipad", + other_aug_root + "helipad_ship", + other_aug_root + "roundabout", + other_aug_root + "soccer-ball-field_ground-track-field"), + img_scale=(512, 512), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0.5, + with_mask=True, + with_crowd=True, + with_label=True), + val=dict( + type=dataset_type, + ann_file=data_root + 'annotation/annos_rscup_val.json', + img_prefix=data_root + 'val/', + img_scale=(512, 512), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_crowd=True, + with_label=True), + test=dict( + type=dataset_type, + ann_file='./data/rscup/debug.json', + img_prefix='./data/rscup/debug/', + img_scale=(512, 512), + img_norm_cfg=img_norm_cfg, + size_divisor=32, + flip_ratio=0, + with_mask=True, + with_label=False, + test_mode=True)) +# optimizer +optimizer = dict(type='SGD', lr=1e-2, momentum=0.9, weight_decay=0.0001) +optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) +# learning policy +lr_config = dict( + policy='step', + warmup='linear', + warmup_iters=500, + warmup_ratio=1.0 / 3, + step=[8, 11]) +checkpoint_config = dict(interval=1) +# yapf:disable +log_config = dict( + interval=50, + hooks=[ + dict(type='TextLoggerHook'), + dict(type='TensorboardLoggerHook') + ]) +# yapf:enable +# runtime settings +total_epochs = 12 +dist_params = dict(backend='nccl') +log_level = 'INFO' +work_dir = './work_dirs/htc_deform_cas_newdata' +load_from = None +resume_from = None +workflow = [('train', 1)] diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..c0920d9 --- /dev/null +++ b/setup.py @@ -0,0 +1,199 @@ +import os +import platform +import subprocess +import time +from setuptools import Extension, find_packages, setup + +import numpy as np +from Cython.Build import cythonize +from torch.utils.cpp_extension import BuildExtension, CUDAExtension + + +def readme(): + with open('README.md', encoding='utf-8') as f: + content = f.read() + return content + + +MAJOR = 0 +MINOR = 6 +PATCH = 0 +SUFFIX = '' +SHORT_VERSION = '{}.{}.{}{}'.format(MAJOR, MINOR, PATCH, SUFFIX) + +version_file = 'mmdet/version.py' + + +def get_git_hash(): + + def _minimal_ext_cmd(cmd): + # construct minimal environment + env = {} + for k in ['SYSTEMROOT', 'PATH', 'HOME']: + v = os.environ.get(k) + if v is not None: + env[k] = v + # LANGUAGE is used on win32 + env['LANGUAGE'] = 'C' + env['LANG'] = 'C' + env['LC_ALL'] = 'C' + out = subprocess.Popen( + cmd, stdout=subprocess.PIPE, env=env).communicate()[0] + return out + + try: + out = _minimal_ext_cmd(['git', 'rev-parse', 'HEAD']) + sha = out.strip().decode('ascii') + except OSError: + sha = 'unknown' + + return sha + + +def get_hash(): + if os.path.exists('.git'): + sha = get_git_hash()[:7] + elif os.path.exists(version_file): + try: + from mmdet.version import __version__ + sha = __version__.split('+')[-1] + except ImportError: + raise ImportError('Unable to get git version') + else: + sha = 'unknown' + + return sha + + +def write_version_py(): + content = """# GENERATED VERSION FILE +# TIME: {} + +__version__ = '{}' +short_version = '{}' +""" + sha = get_hash() + VERSION = SHORT_VERSION + '+' + sha + + with open(version_file, 'w') as f: + f.write(content.format(time.asctime(), VERSION, SHORT_VERSION)) + + +def get_version(): + with open(version_file, 'r') as f: + exec(compile(f.read(), version_file, 'exec')) + return locals()['__version__'] + + +def make_cuda_ext(name, module, sources): + + return CUDAExtension( + name='{}.{}'.format(module, name), + sources=[os.path.join(*module.split('.'), p) for p in sources], + extra_compile_args={ + 'cxx': [], + 'nvcc': [ + '-D__CUDA_NO_HALF_OPERATORS__', + '-D__CUDA_NO_HALF_CONVERSIONS__', + '-D__CUDA_NO_HALF2_OPERATORS__', + ] + }) + + +def make_cython_ext(name, module, sources): + extra_compile_args = None + if platform.system() != 'Windows': + extra_compile_args = { + 'cxx': ['-Wno-unused-function', '-Wno-write-strings'] + } + + extension = Extension( + '{}.{}'.format(module, name), + [os.path.join(*module.split('.'), p) for p in sources], + include_dirs=[np.get_include()], + language='c++', + extra_compile_args=extra_compile_args) + extension, = cythonize(extension) + return extension + + +if __name__ == '__main__': + write_version_py() + setup( + name='mmdet', + version=get_version(), + description='Open MMLab Detection Toolbox', + long_description=readme(), + keywords='computer vision, object detection', + url='https://github.com/open-mmlab/mmdetection', + packages=find_packages(exclude=('configs', 'tools', 'demo')), + package_data={'mmdet.ops': ['*/*.so']}, + classifiers=[ + 'Development Status :: 4 - Beta', + 'License :: OSI Approved :: Apache Software License', + 'Operating System :: OS Independent', + 'Programming Language :: Python :: 2', + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + 'Programming Language :: Python :: 3.4', + 'Programming Language :: Python :: 3.5', + 'Programming Language :: Python :: 3.6', + ], + license='Apache License 2.0', + setup_requires=['pytest-runner', 'cython', 'numpy'], + tests_require=['pytest'], + install_requires=[ + 'mmcv>=0.2.6', 'numpy', 'matplotlib', 'six', 'terminaltables', + 'pycocotools', 'torch>=1.1' + ], + ext_modules=[ + make_cython_ext( + name='soft_nms_cpu', + module='mmdet.ops.nms', + sources=['src/soft_nms_cpu.pyx']), + make_cuda_ext( + name='roi_align_cuda', + module='mmdet.ops.roi_align', + sources=['src/roi_align_cuda.cpp', 'src/roi_align_kernel.cu']), + make_cuda_ext( + name='roi_pool_cuda', + module='mmdet.ops.roi_pool', + sources=['src/roi_pool_cuda.cpp', 'src/roi_pool_kernel.cu']), + make_cuda_ext( + name='nms_cpu', + module='mmdet.ops.nms', + sources=['src/nms_cpu.cpp']), + make_cuda_ext( + name='nms_cuda', + module='mmdet.ops.nms', + sources=['src/nms_cuda.cpp', 'src/nms_kernel.cu']), + make_cuda_ext( + name='deform_conv_cuda', + module='mmdet.ops.dcn', + sources=[ + 'src/deform_conv_cuda.cpp', + 'src/deform_conv_cuda_kernel.cu' + ]), + make_cuda_ext( + name='deform_pool_cuda', + module='mmdet.ops.dcn', + sources=[ + 'src/deform_pool_cuda.cpp', + 'src/deform_pool_cuda_kernel.cu' + ]), + make_cuda_ext( + name='sigmoid_focal_loss_cuda', + module='mmdet.ops.sigmoid_focal_loss', + sources=[ + 'src/sigmoid_focal_loss.cpp', + 'src/sigmoid_focal_loss_cuda.cu' + ]), + make_cuda_ext( + name='masked_conv2d_cuda', + module='mmdet.ops.masked_conv', + sources=[ + 'src/masked_conv2d_cuda.cpp', 'src/masked_conv2d_kernel.cu' + ]), + ], + cmdclass={'build_ext': BuildExtension}, + zip_safe=False)