We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
你好,请问这种小数据集是不是不太适合DETR系列的模型呢?我尝试在ARS-DETR上复现,结果也接近0,但是类似的配置参数在mmrotate的官方仓库能达到ap50:0.90左右。
sys.platform: linux Python: 3.8.20 | packaged by conda-forge | (default, Sep 30 2024, 17:52:49) [GCC 13.3.0] CUDA available: True GPU 0: NVIDIA GeForce RTX 3090 CUDA_HOME: /usr/local/cuda NVCC: Cuda compilation tools, release 11.3, V11.3.58 GCC: gcc (Ubuntu 7.5.0-6ubuntu2) 7.5.0 PyTorch: 1.12.1+cu113 PyTorch compiling details: PyTorch built with:
2025-01-04 16:47:44,879 - mmrotate - INFO - Distributed training: False 2025-01-04 16:47:45,188 - mmrotate - INFO - Config: dataset_type = 'SARDataset' data_root = '/data/seekyou/ssdd/' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False) train_pipeline = [ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(608, 608)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version='oc'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ] test_pipeline = [ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(608, 608), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ] data = dict( samples_per_gpu=4, workers_per_gpu=2, train=dict( type='SARDataset', ann_file='/data/seekyou/ssdd/train/labelTxt/', img_prefix='/data/seekyou/ssdd/train/images/', pipeline=[ dict(type='LoadImageFromFile'), dict(type='LoadAnnotations', with_bbox=True), dict(type='RResize', img_scale=(608, 608)), dict( type='RRandomFlip', flip_ratio=[0.25, 0.25, 0.25], direction=['horizontal', 'vertical', 'diagonal'], version='oc'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels']) ], filter_empty_gt=False, version='oc'), val=dict( type='SARDataset', ann_file='/data/seekyou/ssdd/test/inshore/labelTxt/', img_prefix='/data/seekyou/ssdd/test/inshore/images/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(608, 608), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ], version='oc'), test=dict( type='SARDataset', ann_file='/data/seekyou/ssdd/test/all/labelTxt/', img_prefix='/data/seekyou/ssdd/test/all/images/', pipeline=[ dict(type='LoadImageFromFile'), dict( type='MultiScaleFlipAug', img_scale=(608, 608), flip=False, transforms=[ dict(type='RResize'), dict( type='Normalize', mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False), dict(type='Pad', size_divisor=32), dict(type='DefaultFormatBundle'), dict(type='Collect', keys=['img']) ]) ], version='oc')) evaluation = dict(interval=1, metric='mAP', save_best='auto') optimizer = dict( type='AdamW', lr=0.0001, weight_decay=1e-05, betas=(0.9, 0.999), paramwise_cfg=dict( custom_keys=dict( backbone=dict(lr_mult=0.1), sampling_offsets=dict(lr_mult=0.1), reference_points=dict(lr_mult=0.1)))) optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2)) lr_config = dict( policy='step', warmup='linear', warmup_iters=500, warmup_ratio=0.3333333333333333, step=[120]) runner = dict(type='EpochBasedRunner', max_epochs=150) checkpoint_config = dict(interval=6) log_config = dict(interval=20, hooks=[dict(type='TextLoggerHook')]) dist_params = dict(backend='nccl') log_level = 'INFO' load_from = None resume_from = None workflow = [('train', 1)] angle_version = 'oc' model = dict( type='RotatedDeformableDETR', backbone=dict( type='ResNet', depth=50, num_stages=4, out_indices=(1, 2, 3), frozen_stages=1, norm_cfg=dict(type='BN', requires_grad=False), norm_eval=True, style='pytorch', init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')), neck=dict( type='ChannelMapper', in_channels=[512, 1024, 2048], kernel_size=1, out_channels=256, act_cfg=None, norm_cfg=dict(type='GN', num_groups=32), num_outs=4), bbox_head=dict( type='RotatedDeformableDETRHead', num_query=250, num_classes=1, in_channels=2048, sync_cls_avg_factor=True, as_two_stage=True, transformer=dict( type='RotatedDeformableDetrTransformer', two_stage_num_proposals=250, encoder=dict( type='DetrTransformerEncoder', num_layers=6, transformerlayers=dict( type='BaseTransformerLayer', attn_cfgs=dict( type='MultiScaleDeformableAttention', embed_dims=256), feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'ffn', 'norm'))), decoder=dict( type='RotatedDeformableDetrTransformerDecoder', num_layers=6, return_intermediate=True, transformerlayers=dict( type='DetrTransformerDecoderLayer', attn_cfgs=[ dict( type='MultiheadAttention', embed_dims=256, num_heads=8, dropout=0.1), dict( type='MultiScaleDeformableAttention', embed_dims=256) ], feedforward_channels=1024, ffn_dropout=0.1, operation_order=('self_attn', 'norm', 'cross_attn', 'norm', 'ffn', 'norm')))), positional_encoding=dict( type='SinePositionalEncoding', num_feats=128, normalize=True, offset=-0.5), bbox_coder=dict( type='DeltaXYWHAOBBoxCoder', angle_range='oc', norm_factor=None, edge_swap=True, proj_xy=True, target_means=(0.0, 0.0, 0.0, 0.0, 0.0), target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)), loss_cls=dict( type='FocalLoss', use_sigmoid=True, gamma=2.0, alpha=0.25, loss_weight=2.0), loss_bbox=dict(type='L1Loss', loss_weight=2.0), reg_decoded_bbox=True, loss_iou=dict(type='RotatedIoULoss', loss_weight=5.0), with_box_refine=True), train_cfg=dict( assigner=dict( type='Rotated_HungarianAssigner', cls_cost=dict(type='FocalLossCost', weight=2.0), reg_cost=dict(type='RBBoxL1Cost', weight=2.0, box_format='xywha'), iou_cost=dict(type='RotatedIoUCost', iou_mode='iou', weight=5.0))), test_cfg=dict()) find_unused_parameters = True work_dir = 'work_dirs/new_refine/' auto_resume = False gpu_ids = range(0, 1)
The text was updated successfully, but these errors were encountered:
No branches or pull requests
你好,请问这种小数据集是不是不太适合DETR系列的模型呢?我尝试在ARS-DETR上复现,结果也接近0,但是类似的配置参数在mmrotate的官方仓库能达到ap50:0.90左右。
2025-01-04 16:47:44,565 - mmrotate - INFO - Environment info:
sys.platform: linux
Python: 3.8.20 | packaged by conda-forge | (default, Sep 30 2024, 17:52:49) [GCC 13.3.0]
CUDA available: True
GPU 0: NVIDIA GeForce RTX 3090
CUDA_HOME: /usr/local/cuda
NVCC: Cuda compilation tools, release 11.3, V11.3.58
GCC: gcc (Ubuntu 7.5.0-6ubuntu2) 7.5.0
PyTorch: 1.12.1+cu113
PyTorch compiling details: PyTorch built with:
TorchVision: 0.13.1+cu113
OpenCV: 4.10.0
MMCV: 1.7.2
MMCV Compiler: GCC 9.3
MMCV CUDA Compiler: 11.3
MMRotate: 0.1.0+f6da5ee
2025-01-04 16:47:44,879 - mmrotate - INFO - Distributed training: False
2025-01-04 16:47:45,188 - mmrotate - INFO - Config:
dataset_type = 'SARDataset'
data_root = '/data/seekyou/ssdd/'
img_norm_cfg = dict(
mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_rgb=False)
train_pipeline = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RResize', img_scale=(608, 608)),
dict(
type='RRandomFlip',
flip_ratio=[0.25, 0.25, 0.25],
direction=['horizontal', 'vertical', 'diagonal'],
version='oc'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
]
test_pipeline = [
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(608, 608),
flip=False,
transforms=[
dict(type='RResize'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
]
data = dict(
samples_per_gpu=4,
workers_per_gpu=2,
train=dict(
type='SARDataset',
ann_file='/data/seekyou/ssdd/train/labelTxt/',
img_prefix='/data/seekyou/ssdd/train/images/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True),
dict(type='RResize', img_scale=(608, 608)),
dict(
type='RRandomFlip',
flip_ratio=[0.25, 0.25, 0.25],
direction=['horizontal', 'vertical', 'diagonal'],
version='oc'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img', 'gt_bboxes', 'gt_labels'])
],
filter_empty_gt=False,
version='oc'),
val=dict(
type='SARDataset',
ann_file='/data/seekyou/ssdd/test/inshore/labelTxt/',
img_prefix='/data/seekyou/ssdd/test/inshore/images/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(608, 608),
flip=False,
transforms=[
dict(type='RResize'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
],
version='oc'),
test=dict(
type='SARDataset',
ann_file='/data/seekyou/ssdd/test/all/labelTxt/',
img_prefix='/data/seekyou/ssdd/test/all/images/',
pipeline=[
dict(type='LoadImageFromFile'),
dict(
type='MultiScaleFlipAug',
img_scale=(608, 608),
flip=False,
transforms=[
dict(type='RResize'),
dict(
type='Normalize',
mean=[123.675, 116.28, 103.53],
std=[58.395, 57.12, 57.375],
to_rgb=False),
dict(type='Pad', size_divisor=32),
dict(type='DefaultFormatBundle'),
dict(type='Collect', keys=['img'])
])
],
version='oc'))
evaluation = dict(interval=1, metric='mAP', save_best='auto')
optimizer = dict(
type='AdamW',
lr=0.0001,
weight_decay=1e-05,
betas=(0.9, 0.999),
paramwise_cfg=dict(
custom_keys=dict(
backbone=dict(lr_mult=0.1),
sampling_offsets=dict(lr_mult=0.1),
reference_points=dict(lr_mult=0.1))))
optimizer_config = dict(grad_clip=dict(max_norm=35, norm_type=2))
lr_config = dict(
policy='step',
warmup='linear',
warmup_iters=500,
warmup_ratio=0.3333333333333333,
step=[120])
runner = dict(type='EpochBasedRunner', max_epochs=150)
checkpoint_config = dict(interval=6)
log_config = dict(interval=20, hooks=[dict(type='TextLoggerHook')])
dist_params = dict(backend='nccl')
log_level = 'INFO'
load_from = None
resume_from = None
workflow = [('train', 1)]
angle_version = 'oc'
model = dict(
type='RotatedDeformableDETR',
backbone=dict(
type='ResNet',
depth=50,
num_stages=4,
out_indices=(1, 2, 3),
frozen_stages=1,
norm_cfg=dict(type='BN', requires_grad=False),
norm_eval=True,
style='pytorch',
init_cfg=dict(type='Pretrained', checkpoint='torchvision://resnet50')),
neck=dict(
type='ChannelMapper',
in_channels=[512, 1024, 2048],
kernel_size=1,
out_channels=256,
act_cfg=None,
norm_cfg=dict(type='GN', num_groups=32),
num_outs=4),
bbox_head=dict(
type='RotatedDeformableDETRHead',
num_query=250,
num_classes=1,
in_channels=2048,
sync_cls_avg_factor=True,
as_two_stage=True,
transformer=dict(
type='RotatedDeformableDetrTransformer',
two_stage_num_proposals=250,
encoder=dict(
type='DetrTransformerEncoder',
num_layers=6,
transformerlayers=dict(
type='BaseTransformerLayer',
attn_cfgs=dict(
type='MultiScaleDeformableAttention', embed_dims=256),
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'ffn', 'norm'))),
decoder=dict(
type='RotatedDeformableDetrTransformerDecoder',
num_layers=6,
return_intermediate=True,
transformerlayers=dict(
type='DetrTransformerDecoderLayer',
attn_cfgs=[
dict(
type='MultiheadAttention',
embed_dims=256,
num_heads=8,
dropout=0.1),
dict(
type='MultiScaleDeformableAttention',
embed_dims=256)
],
feedforward_channels=1024,
ffn_dropout=0.1,
operation_order=('self_attn', 'norm', 'cross_attn', 'norm',
'ffn', 'norm')))),
positional_encoding=dict(
type='SinePositionalEncoding',
num_feats=128,
normalize=True,
offset=-0.5),
bbox_coder=dict(
type='DeltaXYWHAOBBoxCoder',
angle_range='oc',
norm_factor=None,
edge_swap=True,
proj_xy=True,
target_means=(0.0, 0.0, 0.0, 0.0, 0.0),
target_stds=(0.1, 0.1, 0.2, 0.2, 0.1)),
loss_cls=dict(
type='FocalLoss',
use_sigmoid=True,
gamma=2.0,
alpha=0.25,
loss_weight=2.0),
loss_bbox=dict(type='L1Loss', loss_weight=2.0),
reg_decoded_bbox=True,
loss_iou=dict(type='RotatedIoULoss', loss_weight=5.0),
with_box_refine=True),
train_cfg=dict(
assigner=dict(
type='Rotated_HungarianAssigner',
cls_cost=dict(type='FocalLossCost', weight=2.0),
reg_cost=dict(type='RBBoxL1Cost', weight=2.0, box_format='xywha'),
iou_cost=dict(type='RotatedIoUCost', iou_mode='iou', weight=5.0))),
test_cfg=dict())
find_unused_parameters = True
work_dir = 'work_dirs/new_refine/'
auto_resume = False
gpu_ids = range(0, 1)
The text was updated successfully, but these errors were encountered: