forked from open-mmlab/mmaction
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
* Update INSTALL.md libboost-all-dev is required to build dense_flow * fix dense_flow path error * support database hmdb51
- Loading branch information
Showing
13 changed files
with
475 additions
and
4 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# model settings | ||
model = dict( | ||
type='TSN2D', | ||
modality='Flow', | ||
in_channels=10, | ||
backbone=dict( | ||
type='BNInception', | ||
pretrained='open-mmlab://bninception_caffe', | ||
bn_eval=False, | ||
partial_bn=True), | ||
spatial_temporal_module=dict( | ||
type='SimpleSpatialModule', | ||
spatial_type='avg', | ||
spatial_size=7), | ||
segmental_consensus=dict( | ||
type='SimpleConsensus', | ||
consensus_type='avg'), | ||
cls_head=dict( | ||
type='ClsHead', | ||
with_avg_pool=False, | ||
temporal_feature_size=1, | ||
spatial_feature_size=1, | ||
dropout_ratio=0.7, | ||
in_channels=1024, | ||
num_classes=51)) | ||
train_cfg = None | ||
test_cfg = None | ||
# dataset settings | ||
dataset_type = 'RawFramesDataset' | ||
data_root = 'data/hmdb51/rawframes' | ||
img_norm_cfg = dict( | ||
mean=[128], std=[1], to_rgb=False) | ||
data = dict( | ||
videos_per_gpu=32, | ||
workers_per_gpu=2, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=3, | ||
new_length=5, | ||
new_step=1, | ||
random_shift=True, | ||
modality='Flow', | ||
image_tmpl='flow_{}_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0.5, | ||
resize_keep_ratio=True, | ||
oversample=None, | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=True, | ||
scales=[1, 0.875, 0.75, 0.66], | ||
max_distort=1, | ||
test_mode=False), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=3, | ||
new_length=5, | ||
new_step=1, | ||
random_shift=False, | ||
modality='Flow', | ||
image_tmpl='flow_{}_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0, | ||
resize_keep_ratio=True, | ||
oversample=None, | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=False, | ||
test_mode=False), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=25, | ||
new_length=5, | ||
new_step=1, | ||
random_shift=False, | ||
modality='Flow', | ||
image_tmpl='flow_{}_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0, | ||
resize_keep_ratio=True, | ||
oversample='ten_crop', | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=False, | ||
test_mode=True)) | ||
# optimizer | ||
optimizer = dict(type='SGD', lr=0.005, momentum=0.9, weight_decay=0.0005) | ||
optimizer_config = dict(grad_clip=dict(max_norm=20, norm_type=2)) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
step=[190, 300]) | ||
checkpoint_config = dict(interval=1) | ||
# workflow = [('train', 5), ('val', 1)] | ||
workflow = [('train', 1)] | ||
# yapf:disable | ||
log_config = dict( | ||
interval=20, | ||
hooks=[ | ||
dict(type='TextLoggerHook'), | ||
# dict(type='TensorboardLoggerHook') | ||
]) | ||
# yapf:enable | ||
# runtime settings | ||
total_epochs = 340 | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/tsn_2d_flow_bninception_seg_3_f1s1_b32_g8_lr_0.005' | ||
load_from = None | ||
resume_from = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,125 @@ | ||
# model settings | ||
model = dict( | ||
type='TSN2D', | ||
backbone=dict( | ||
type='BNInception', | ||
pretrained='open-mmlab://bninception_caffe', | ||
bn_eval=False, | ||
partial_bn=True), | ||
spatial_temporal_module=dict( | ||
type='SimpleSpatialModule', | ||
spatial_type='avg', | ||
spatial_size=7), | ||
segmental_consensus=dict( | ||
type='SimpleConsensus', | ||
consensus_type='avg'), | ||
cls_head=dict( | ||
type='ClsHead', | ||
with_avg_pool=False, | ||
temporal_feature_size=1, | ||
spatial_feature_size=1, | ||
dropout_ratio=0.8, | ||
in_channels=1024, | ||
init_std=0.001, | ||
num_classes=51)) | ||
train_cfg = None | ||
test_cfg = None | ||
# dataset settings | ||
dataset_type = 'RawFramesDataset' | ||
data_root = 'data/hmdb51/rawframes' | ||
img_norm_cfg = dict( | ||
mean=[104, 117, 128], std=[1, 1, 1], to_rgb=False) | ||
|
||
data = dict( | ||
videos_per_gpu=32, | ||
workers_per_gpu=2, | ||
train=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_train_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=3, | ||
new_length=1, | ||
new_step=1, | ||
random_shift=True, | ||
modality='RGB', | ||
image_tmpl='img_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0.5, | ||
resize_keep_ratio=True, | ||
oversample=None, | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=True, | ||
scales=[1, 0.875, 0.75, 0.66], | ||
max_distort=1, | ||
test_mode=False), | ||
val=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=3, | ||
new_length=1, | ||
new_step=1, | ||
random_shift=False, | ||
modality='RGB', | ||
image_tmpl='img_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0, | ||
resize_keep_ratio=True, | ||
oversample=None, | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=False, | ||
test_mode=False), | ||
test=dict( | ||
type=dataset_type, | ||
ann_file='data/hmdb51/hmdb51_val_split_1_rawframes.txt', | ||
img_prefix=data_root, | ||
img_norm_cfg=img_norm_cfg, | ||
num_segments=25, | ||
new_length=1, | ||
new_step=1, | ||
random_shift=False, | ||
modality='RGB', | ||
image_tmpl='img_{:05d}.jpg', | ||
img_scale=256, | ||
input_size=224, | ||
div_255=False, | ||
flip_ratio=0, | ||
resize_keep_ratio=True, | ||
oversample='ten_crop', | ||
random_crop=False, | ||
more_fix_crop=False, | ||
multiscale_crop=False, | ||
test_mode=True)) | ||
# optimizer | ||
optimizer = dict(type='SGD', lr=0.001, momentum=0.9, weight_decay=0.0005) | ||
optimizer_config = dict(grad_clip=dict(max_norm=40, norm_type=2)) | ||
# learning policy | ||
lr_config = dict( | ||
policy='step', | ||
step=[30, 60]) | ||
checkpoint_config = dict(interval=1) | ||
# workflow = [('train', 5), ('val', 1)] | ||
workflow = [('train', 1)] | ||
# yapf:disable | ||
log_config = dict( | ||
interval=20, | ||
hooks=[ | ||
dict(type='TextLoggerHook'), | ||
# dict(type='TensorboardLoggerHook') | ||
]) | ||
# yapf:enable | ||
# runtime settings | ||
total_epochs = 80 | ||
dist_params = dict(backend='nccl') | ||
log_level = 'INFO' | ||
work_dir = './work_dirs/tsn_2d_rgb_bninception_seg_3_f1s1_b32_g8' | ||
load_from = None | ||
resume_from = None |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
## Preparing HMDB51 | ||
|
||
For more details, please refer to the official [website](http://serre-lab.clps.brown.edu/resource/hmdb-a-large-human-motion-database/). We provide scripts with documentations. Before we start, please make sure that the directory is located at `$MMACTION/data_tools/hmdb51/`. | ||
|
||
### Prepare annotations | ||
First of all, run the following script to prepare annotations. | ||
```shell | ||
bash download_annotations.sh | ||
``` | ||
|
||
### Prepare videos | ||
Then, use the following script to prepare videos. | ||
```shell | ||
bash download_videos.sh | ||
``` | ||
|
||
### Extract frames | ||
Now it is time to extract frames from videos. | ||
Before extraction, please refer to `DATASET.md` for installing [dense_flow](https://github.com/yjxiong/dense_flow). | ||
If you have some SSD, then we recommend extracting frames there for better I/O performance. The extracted frames (RGB + Flow) will take up ~24GB. | ||
```shell | ||
# execute these two line (Assume the SSD is mounted at "/mnt/SSD/") | ||
mkdir /mnt/SSD/hmdb51_extracted/ | ||
ln -s /mnt/SSD/hmdb51_extracted/ ../data/hmdb51/rawframes | ||
``` | ||
|
||
If you didn't install dense_flow in the installation or only want to play with RGB frames (since extracting optical flow can be both time-comsuming and space-hogging), consider running the following script to extract **RGB-only** frames. | ||
```shell | ||
bash extract_rgb_frames.sh | ||
``` | ||
|
||
If both rgb and optical flow are required, run the following script to extract frames alternatively. | ||
```shell | ||
bash extract_frames.sh | ||
``` | ||
|
||
### Generate filelist | ||
Run the follow script to generate filelist in the format of rawframes and videos. | ||
```shell | ||
bash generate_filelist.sh | ||
``` | ||
|
||
### Folder structure | ||
In the context of the whole project (for ucf101 only), the folder structure will look like: | ||
``` | ||
mmaction | ||
├── mmaction | ||
├── tools | ||
├── configs | ||
├── data | ||
│ ├── hmdb51 | ||
│ │ ├── hmdb51_{train,val}_split_{1,2,3}_rawframes.txt | ||
│ │ ├── hmdb51_{train,val}_split_{1,2,3}_videos.txt | ||
│ │ ├── annotations | ||
│ │ ├── videos | ||
│ │ │ ├── brush_hair | ||
│ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0.avi | ||
│ │ │ ├── wave | ||
│ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0.avi | ||
│ │ ├── rawframes | ||
│ │ │ ├── brush_hair | ||
│ │ │ │ ├── April_09_brush_hair_u_nm_np1_ba_goo_0 | ||
│ │ │ │ │ ├── img_00001.jpg | ||
│ │ │ │ │ ├── img_00002.jpg | ||
│ │ │ │ │ ├── ... | ||
│ │ │ │ │ ├── flow_x_00001.jpg | ||
│ │ │ │ │ ├── flow_x_00002.jpg | ||
│ │ │ │ │ ├── ... | ||
│ │ │ │ │ ├── flow_y_00001.jpg | ||
│ │ │ │ │ ├── flow_y_00002.jpg | ||
│ │ │ ├── ... | ||
│ │ │ ├── wave | ||
│ │ │ │ ├── 20060723sfjffbartsinger_wave_f_cm_np1_ba_med_0 | ||
│ │ │ │ ├── ... | ||
│ │ │ │ ├── winKen_wave_u_cm_np1_ri_bad_1 | ||
``` | ||
|
||
For training and evaluating on HMDB51, please refer to [GETTING_STARTED.md](https://github.com/open-mmlab/mmaction/blob/master/GETTING_STARTED.md). |
Oops, something went wrong.