data/dataset_definitions.yml

datasets:
  - name: ms_coco_mask_rcnn
    annotation_conversion:
      converter: mscoco_mask_rcnn
      annotation_file: instances_val2017.json
      has_background: True
      sort_annotations: True
    annotation: mscoco_mask_rcnn.pickle
    dataset_meta: mscoco_mask_rcnn.json
    data_source: val2017

  - name: ms_coco_mask_rcnn_short_80_classes
    annotation_conversion:
      converter: mscoco_mask_rcnn
      annotation_file: instances_val2017_short.json
      has_background: True
      sort_annotations: True
    annotation: mscoco_mask_rcnn_short_80.pickle
    dataset_meta: mscoco_mask_rcnn_short_80.json
    data_source: val2017

  - name: ms_coco_mask_rcnn_short_80_classes_without_background
    annotation_conversion:
      converter: mscoco_mask_rcnn
      annotation_file: instances_val2017.json
      has_background: False
      sort_annotations: True
    annotation: mscoco_mask_rcnn_short_80_without_bkgr.pickle
    dataset_meta: mscoco_mask_rcnn_short_80_without_bkgr.json
    data_source: val2017

  - name: ms_coco_mask_rcnn_short_91_classes
    annotation_conversion:
      converter: mscoco_mask_rcnn
      annotation_file: instances_val2017_short.json
      has_background: True
      sort_annotations: True
      use_full_label_map: True
    annotation: mscoco_mask_rcnn_short_91.pickle
    dataset_meta: mscoco_mask_rcnn_short_91.json
    data_source: val2017
    preprocessing:
      - type: resize
        aspect_ratio_scale: fit_to_window
        dst_height: 800
        dst_width: 1365
      - type: padding
        dst_height: 800
        dst_width: 1365
        pad_type: right_bottom

    postprocessing:
      - type: faster_rcnn_postprocessing_resize
        dst_height: 800
        dst_width: 1365

  - name: ms_coco_detection_91_classes
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: instances_val2017.json
      has_background: True
      sort_annotations: True
      use_full_label_map: True
    annotation: mscoco_det_91.pickle
    dataset_meta: mscoco_det_91.json
    data_source: val2017
    preprocessing:
      - type: resize
        aspect_ratio_scale: fit_to_window
        dst_height: 600
        dst_width: 1024
      - type: padding
        dst_height: 600
        dst_width: 1024
        pad_type: right_bottom

    postprocessing:
      - type: faster_rcnn_postprocessing_resize
        dst_height: 600
        dst_width: 1024

  - name: ms_coco_detection_80_class_without_background
    data_source: val2017
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: instances_val2017.json
      has_background: False
      sort_annotations: True
      use_full_label_map: False
    annotation: mscoco_det_80.pickle
    dataset_meta: mscoco_det_80.json

  - name: ms_coco_detection_80_class_with_background
    data_source: val2017
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: instances_val2017.json
      has_background: True
      sort_annotations: True
      use_full_label_map: False
    annotation: mscoco_det_80_bkgr.pickle
    dataset_meta: mscoco_det_80_bkgr.json

  - name: ms_coco_detection_90_class_without_background
    data_source: MSCOCO/val2017
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: MSCOCO/annotations/instances_val2017.json
      has_background: False
      sort_annotations: True
      use_full_label_map: True
    annotation:  mscoco_det_90.pickle
    dataset_meta:  mscoco_det_90.json

  - name: ms_coco_keypoints
    data_source: val2017
    annotation_conversion:
      converter: mscoco_keypoints
      annotation_file: person_keypoints_val2017.json
      sort_key: image_size
    annotation: mscoco_keypoints.pickle
    dataset_meta: mscoco_keypoints.json
    metrics:
      - name: AP
        type: coco_precision
        max_detections: 20

  - name: ms_coco_val2017_keypoints
    data_source: val2017
    annotation_conversion:
      converter: mscoco_keypoints
      annotation_file: person_keypoints_val2017.json
      remove_empty_images: True
      sort_annotations: True
      sort_key: image_size
      images_dir: val2017
    annotation: mscoco_val2017_keypoints.pickle
    dataset_meta: mscoco_val2017_keypoints.json
    metrics:
      - name: AP
        type: coco_orig_keypoints_precision

  - name: ms_coco_val2017_keypoints_5k_images
    data_source: val2017
    annotation_conversion:
      converter: mscoco_keypoints
      annotation_file: person_keypoints_val2017.json
      remove_empty_images: False
      sort_annotations: True
      sort_key: image_size
      images_dir: val2017
    annotation: mscoco_val2017_keypoints_5k_images.pickle
    dataset_meta: mscoco_val2017_keypoints_5k_images.json
    metrics:
      - name: AP
        type: coco_orig_keypoints_precision

  - name: imagenet_1000_classes
    annotation_conversion:
      converter: imagenet
      annotation_file: val.txt
    annotation: imagenet1000.pickle
    data_source: ILSVRC2012_img_val
    metrics:
      - name: accuracy@top1
        type: accuracy
        top_k: 1
      - name: accuracy@top5
        type: accuracy
        top_k: 5

  - name: imagenet_1000_classes_2015
    annotation_conversion:
      converter: imagenet
      annotation_file: val15.txt
    annotation: imagenet1000_2015.pickle
    data_source: ILSVRC2012_img_val
    metrics:
      - name: accuracy@top1
        type: accuracy
        top_k: 1
      - name: accuracy@top5
        type: accuracy
        top_k: 5

  - name: imagenet_1001_classes
    annotation_conversion:
      converter: imagenet
      annotation_file: val.txt
      has_background: True
    annotation: imagenet1001.pickle
    data_source: ILSVRC2012_img_val
    metrics:
      - name: accuracy@top1
        type: accuracy
        top_k: 1
      - name: accuracy@top5
        type: accuracy
        top_k: 5

  - name: VOC2012
    annotation_conversion:
      converter: voc_detection
      annotations_dir: VOCdevkit/VOC2012/Annotations
      images_dir: VOCdevkit/VOC2012/JPEGImages
      imageset_file: VOCdevkit/VOC2012/ImageSets/Main/val.txt
    data_source: VOCdevkit/VOC2012/JPEGImages
    annotation: voc12.pickle
    dataset_meta: voc12.json
    postprocessing:
      - type: resize_prediction_boxes
    metrics:
      - type: map
        integral: 11point
        ignore_difficult: True
        presenter: print_scalar

  - name: VOC2012_without_background
    annotation_conversion:
      converter: voc_detection
      annotations_dir: VOCdevkit/VOC2012/Annotations
      images_dir: VOCdevkit/VOC2012/JPEGImages
      imageset_file: VOCdevkit/VOC2012/ImageSets/Main/val.txt
      has_background: False
    data_source: VOCdevkit/VOC2012/JPEGImages
    annotation: voc12_without_background.pickle
    dataset_meta: voc12_without_background.json
    postprocessing:
      - type: resize_prediction_boxes
    metrics:
      - type: map
        integral: 11point
        ignore_difficult: True
        presenter: print_scalar

  - name: VOC2012_Segmentation
    annotation_conversion:
      converter: voc_segmentation
      imageset_file: VOCdevkit/VOC2012/ImageSets/Segmentation/val.txt
      images_dir: VOCdevkit/VOC2012/JPEGImages/
      mask_dir: VOCdevkit/VOC2012/SegmentationClass/
    data_source: VOCdevkit/VOC2012/JPEGImages/
    segmentation_masks_source: VOCdevkit/VOC2012/SegmentationClass/
    annotation: voc2012_segmentation.pickle
    dataset_meta: voc2012_segmentation.json

  - name: mapillary_20
    annotation_conversion:
      converter: mapillary_20
      data_dir: Mapillary_20
    annotation: mapillary_20.pickle
    dataset_meta: mapillary_20.json

  - name: mapillary_vistas
    data_source: mapillary_vistas/validation/images
    segmentation_masks_source: mapillary_vistas/validation/labels
    annotation_conversion:
      converter: mapillary_vistas
      data_dir: mapillary_vistas/validation
    annotation: mapillary_vistas.pickle
    dataset_meta: mapillary_vistas.json

  - name: wider
    data_source: WIDER_val/images
    annotation_conversion:
      converter: wider
      annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
    annotation: wider.pickle
    dataset_meta: wider.json

  - name: wider_without_bkgr
    data_source: WIDER_val/images
    annotation_conversion:
      converter: wider
      annotation_file: wider_face_split/wider_face_val_bbx_gt.txt
      label_start: 0
    annotation: wider_0.pickle
    dataset_meta: wider_0.json

  - name: facial_landmarks_35
    data_source: VOCdevkit/VOC2012/JPEGImages
    annotation_conversion:
      converter: cvat_facial_landmarks
      annotation_file: 3632_OMZ_task3_facial_landmarks_35_adas.xml
    annotation: facial_landmarks_35.pickle
    preprocessing:
      - type: resize
        size: 60
    postprocessing:
      - type: normalize_landmarks_points

  - name: emotions_recognition
    data_source: VOCdevkit/VOC2012/JPEGImages
    annotation_conversion:
      converter: cvat_attributes_recognition
      annotation_file: 3631_OMZ_task2_emotions_recognition.xml
      label: face
    annotation: emotions_recognition.pickle
    dataset_meta: emotions_recognition.json
    preprocessing:
      - type: extend_around_rect
        augmentation_param: 0.3
      - type: crop_rect
      - type: resize
        size: 64

  - name: age_gender
    data_source: ILSVRC2012_img_val
    annotation_conversion:
      converter: cvat_age_gender
      annotation_file: 3630_OMZ_task1_age_gender.xml
    annotation: age_gender.pickle
    dataset_meta: age_gender.json

  - name: vehicle_attributes
    data_source: val2017
    annotation_conversion:
      converter: cvat_attributes_recognition
      annotation_file: 3634_OMZ_task8_vehicle_attributes_recognition_barrier_0039.xml
      label: vehicle
    annotation: vehicle_attributes.pickle
    dataset_meta: vehicle_attributes.json

  - name: vehicle_attributes_0042
    data_source: val2017
    annotation_conversion:
      converter: cvat_attributes_recognition
      annotation_file: 3634_OMZ_task8_vehicle_attributes_recognition_barrier_0039.xml
      label: vehicle
    annotation: vehicle_attributes.pickle
    dataset_meta: vehicle_attributes.json

  - name: person_8_attributes
    data_source: ILSVRC2012_img_val
    annotation_conversion:
      converter: cvat_multilabel_binary_attributes_recognition
      annotation_file: 3640_OMZ_task6_person_attributes_recognition_crossroad_0230.xml
      label: person
    annotation: person_8_attributes.pickle
    dataset_meta: person_8_attributes.json

  - name: vehicle_license_plate_detection
    data_source: ILSVRC2012_img_val
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3638_OMZ_task13_vehicle_license_plate_detection_barrier_0106.xml
      has_background: True
    annotation: vlpd.pickle
    dataset_meta: vlpd.json

  - name: action_detection_dataset_3_classes
    data_source: WIDER_val/images/44--Aerobics
    annotation_conversion:
      converter: cvat_person_detection_action_recognition
      use_case: common_3_actions
      annotation_file: 3766_OMZ_task14_person-detection-raisinghand-recognition-0001.xml
    annotation: action_detection_3classes.pickle
    dataset_meta: action_detection_3classes.json

  - name: action_detection_dataset_6_classes
    data_source: WIDER_val/images/44--Aerobics
    annotation_conversion:
      converter: cvat_person_detection_action_recognition
      use_case: common_6_actions
      annotation_file: 3766_OMZ_task14_person-detection-raisinghand-recognition-0001.xml
    annotation: action_detection_6classes.pickle
    dataset_meta: action_detection_6classes.json

  - name: action_detection_dataset_teacher
    data_source: WIDER_val/images/44--Aerobics
    annotation_conversion:
      converter: cvat_person_detection_action_recognition
      use_case: teacher
      annotation_file: 3766_OMZ_task14_person-detection-raisinghand-recognition-0001.xml
    annotation: action_detection_teacher.pickle
    dataset_meta: action_detection_teacher.json

  - name: action_detection_dataset_raising_hand
    data_source: WIDER_val/images/44--Aerobics
    annotation_conversion:
      converter: cvat_person_detection_action_recognition
      use_case: raising_hand
      annotation_file: 3766_OMZ_task14_person-detection-raisinghand-recognition-0001.xml
    annotation: action_detection_raising_hand.pickle
    dataset_meta: action_detection_raising_hand.json

  - name: person_detection
    data_source: val2017
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: person_keypoints_val2017.json
      has_background: True
      sort_annotations: True
      use_full_label_map: True
    annotation: mscoco_person_detection.pickle
    dataset_meta: mscoco_person_detection.json

  - name: mscoco_person_detection
    data_source: val2017
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: person_keypoints_val2017.json
      has_background: True
      sort_annotations: True
      use_full_label_map: True
    annotation: mscoco_person_detection.pickle
    dataset_meta: mscoco_person_detection.json

  - name: crossroad_dataset_1016
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3637_OMZ_task12_person_vehicle_bike_detection_crossroad_0078.xml
      labels_file: person-vehicle-bike-detection-crossroad-1016-labels.json
      has_background: True
    annotation: crossroad-1016.pickle
    dataset_meta: crossroad-1016.json

  - name: crossroad_dataset_0078
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3637_OMZ_task12_person_vehicle_bike_detection_crossroad_0078.xml
      labels_file: person-vehicle-bike-detection-crossroad-0078-labels.json
      has_background: True
    annotation: crossroad-0078.pickle
    dataset_meta: crossroad-0078.json

  - name: crossroad_extra_untagged
    data_source: crossroad_extra_untagged/val_data
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: crossroad_extra_untagged/val_coco_no_bg.json
      has_background: False
      sort_annotations: True
      use_full_label_map: False
    annotation: crossroad_extra_untagged.pickle
    dataset_meta: crossroad_extra_untagged.json

  - name: crossroad_extra_untagged_person
    data_source: crossroad_extra_untagged/val_data
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: crossroad_extra_untagged/val_coco_no_bg_person.json
      has_background: False
      sort_annotations: True
      use_full_label_map: False
    annotation: crossroad_extra_untagged_person.pickle
    dataset_meta: crossroad_extra_untagged_person.json

  - name: crossroad_extra_untagged_person_hb
    data_source: crossroad_extra_untagged/val_data
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: crossroad_extra_untagged/val_coco_no_bg_person.json
      has_background: True
      sort_annotations: True
      use_full_label_map: False
    annotation: crossroad_extra_untagged_person_hb.pickle
    dataset_meta: crossroad_extra_untagged_person_hb.json

  - name: crossroad_extra_untagged_vehicle
    data_source: crossroad_extra_untagged/val_data
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: crossroad_extra_untagged/val_coco_no_bg_vehicle.json
      has_background: False
      sort_annotations: True
      use_full_label_map: False
    annotation: crossroad_extra_untagged_vehicle.pickle
    dataset_meta: crossroad_extra_untagged_vehicle.json

  - name: crossroad_extra_untagged_vehicle_labels_from_1
    data_source: crossroad_extra_untagged/val_data
    annotation_conversion:
      converter: mscoco_detection
      annotation_file: crossroad_extra_untagged/val_coco_no_bg_vehicle.json
      has_background: True
    annotation: crossroad_extra_untagged_vehicle_labels_from_1.pickle
    dataset_meta: crossroad_extra_untagged_vehicle_labels_from_1.json

  - name: pedestrian_and_vehicle_dataset
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3636_OMZ_task11_pedestrian_and_vehicle_detector_adas_0001.xml
      labels_file: pedestrian-and-vehicle-labels.json
      has_background: True
    annotation: pedestrian_and_vehicle.pickle
    dataset_meta: pedestrian_and_vehicle.json

  - name: pedestrian_detection_dataset
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3636_OMZ_task11_pedestrian_and_vehicle_detector_adas_0001.xml
      labels_file: pedestrian-detection-labels.json
      has_background: True
    annotation: pedestrian_detection.pickle
    dataset_meta: pedestrian_detection.json

  - name: vehicle_detection_dataset
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3636_OMZ_task11_pedestrian_and_vehicle_detector_adas_0001.xml
      labels_file: vehicle-detection-labels.json
      has_background: True
    annotation: vehicle_detection.pickle
    dataset_meta: vehicle_detection.json

  - name: barrier_vehicle_detection_dataset_index_class_2
    data_source: val2017
    annotation_conversion:
      converter: cvat_object_detection
      annotation_file: 3636_OMZ_task11_pedestrian_and_vehicle_detector_adas_0001.xml
      labels_file: vehicle-detection-labels.json
      has_background: True
    annotation: vehicle_detection.pickle
    dataset_meta: vehicle_detection.json

  - name: synthetic_chinese_license_plates
    data_source: Synthetic_Chinese_License_Plates
    annotation_conversion:
      converter: lpr_txt
      annotation_file: Synthetic_Chinese_License_Plates/annotation
      decoding_dictionary_file: dict
    annotation: lpr.pickle
    dataset_meta: lpr.json

    preprocessing:
      - type: resize
        dst_width: 94
        dst_height: 24

  - name: image_retrieval
    data_source: textile_crops
    annotation_conversion:
      converter: image_retrieval
      data_dir: textile_crops
      gallery_annotation_file: textile_crops/gallery/gallery.txt
      queries_annotation_file: textile_crops/queries/quieries.txt
    annotation: textile.pickle
    dataset_meta: textile.json
    preprocessing:
      - type: resize
        size: 224

  - name: lfw
    data_source: LFW/lfw
    annotation_conversion:
      converter: lfw
      pairs_file: LFW/annotation/pairs.txt
      landmarks_file: LFW/annotation/lfw_landmark.txt
    annotation: lfw.pickle

    metrics:
      - type: pairwise_accuracy_subsets
        subset_number: 2

  - name: lfw_mtcnn_align
    # converted from LFW public dataset with using
    # script https://github.com/davidsandberg/facenet/blob/master/src/align/align_dataset_mtcnn.py
    data_source: lfw_mtcnnpy_160
    annotation_conversion:
      converter: lfw
      pairs_file: LFW/annotation/pairs.txt
      extension: png
    annotation: lfw_mtcnn_align.pickle

  - name: ICDAR2015
    data_source: ICDAR15_DET/ch4_test_images
    annotation_conversion:
      converter: icdar_detection
      data_dir: ICDAR15_DET/gt
    annotation: icdar15_detection.pickle

  - name: ICDAR2015_recognition
    data_source: ic15_1811
    annotation_conversion:
      converter: lmdb_text_recognition_database
      lower_case: true
      data_dir: ic15_1811
    annotation: icdar15_recognition.pickle
    reader:
      type: lmdb_reader

  - name: ICDAR03_recognition
    data_source: ic03_867
    annotation_conversion:
      converter: lmdb_text_recognition_database
      lower_case: true
      data_dir: ic03_867
    annotation: icdar03_recognition.pickle
    reader:
      type: lmdb_reader

  - name: SVT_recognition
    data_source: svt_647
    annotation_conversion:
      converter: lmdb_text_recognition_database
      lower_case: true
      data_dir: svt_647
    annotation: svt_recognition.pickle
    reader:
      type: lmdb_reader

  - name: IIIT5K
    data_source: IIIT5K_3000
    annotation_conversion:
      converter: lmdb_text_recognition_database
      lower_case: true
      data_dir: IIIT5K_3000
    annotation: IIIT5K_3000.pickle
    reader:
      type: lmdb_reader

  - name: ICDAR2015_word_spotting
    data_source: ICDAR15_DET/ch4_test_images
    annotation_conversion:
      converter: icdar_detection
      word_spotting: True
      data_dir: ICDAR15_DET/gt_proper
    annotation: icdar15_detection.pickle

  - name: ICDAR2013_detection
    data_source: ICDAR13_DET
    annotation_conversion:
      converter: icdar_detection
      data_dir: ICDAR13_DET/gt
    annotation: icdar13_detection.pickle

  - name: ICDAR2013
    data_source: ICDAR13_REC/Challenge2_Test_Task3_Images
    annotation_conversion:
      converter: icdar13_recognition
      annotation_file: ICDAR13_REC/gt/gt.txt.fixed.alfanumeric
    annotation: icdar13_recognition.pickle
    dataset_meta: icdar13_recognition.json

  - name: ICDAR2013_detection
    data_source: ICDAR13_DET
    annotation_conversion:
      converter: icdar_detection
      data_dir: ICDAR13_DET/gt
    annotation: icdar13_detection.pickle

  - name: im2latex_medium_rendered
    data_source: im2latex_medium_rendered/images_processed
    annotation_conversion:
      converter: im2latex_formula_recognition
      data_dir: im2latex_medium_rendered
      images_dir: im2latex_medium_rendered/images_processed
      formula_file: im2latex_medium_rendered/formulas.norm.lst
      split_file: im2latex_medium_rendered/validate_filter.lst
      vocab_file: im2latex_medium_rendered/vocab.json
    annotation: im2latex_medium_rendered.pickle
    dataset_meta: im2latex_medium_rendered.json

  - name: im2latex_medium_photographed
    data_source: im2latex_medium_photographed/images_processed
    annotation_conversion:
      converter: im2latex_formula_recognition
      data_dir: im2latex_medium_photographed
      images_dir: im2latex_medium_photographed/images_processed
      formula_file: im2latex_medium_photographed/formulas.norm.lst
      split_file: im2latex_medium_photographed/test_filter.lst
      vocab_file: im2latex_medium_photographed/vocab.pkl
    annotation: im2latex_medium_photographed.pickle
    dataset_meta: im2latex_medium_photographed.json

  - name: im2latex_polynomials_handwritten
    data_source: im2latex_polynomials_handwritten/images_processed
    annotation_conversion:
      converter: im2latex_formula_recognition
      data_dir: im2latex_polynomials_handwritten
      images_dir: im2latex_polynomials_handwritten/images_processed
      formula_file: im2latex_polynomials_handwritten/formulas.norm.lst
      split_file: im2latex_polynomials_handwritten/validate_filter.lst
      vocab_file: im2latex_polynomials_handwritten/vocab.json
    annotation: im2latex_polynomials_handwritten.pickle
    dataset_meta: im2latex_polynomials_handwritten.json

  - name: market1501
    data_source: Market-1501-v15.09.15
    annotation_conversion:
      converter: market1501_reid
      data_dir: Market-1501-v15.09.15
    annotation: market1501_reid.pickle

  - name: veri776
    data_source: VeRi-776
    annotation_conversion:
      converter: veri776_reid
      data_dir: VeRi-776
    annotation: veri776_reid.pickle

  - name: reid_dataset
    data_source: Market-1501-v15.09.15
    annotation_conversion:
      converter: market1501_reid
      data_dir: Market-1501-v15.09.15
    annotation: market1501_reid.pickle

  - name: vgg2face
    data_source: VGGFaces2/test
    annotation_conversion:
      converter: vgg_face
      landmarks_csv_file: VGGFaces2/bb_landmark/loose_landmark_test.csv
      bbox_csv_file: VGGFaces2/bb_landmark/loose_bb_test.csv
    annotation: vggfaces2.pickle
    dataset_meta: vggfaces2.json

  - name: semantic_segmentation_adas
    data_source: segmentation/images
    segmentation_masks_source: segmentation/mask_segmentation_adas
    annotation_conversion:
      converter: common_semantic_segmentation
      images_dir: segmentation/images
      masks_dir: segmentation/mask_segmentation_adas
      image_postfix: .JPEG
      mask_postfix: .png
      dataset_meta: segmentation/mask_segmentation_adas/dataset_meta.json
    annotation: semantic_segmentation_adas.pickle
    dataset_meta: semantic_segmentation_adas.json

    preprocessing:
      - type: resize
        dst_height: 1024
        dst_width: 2048

    postprocessing:
      - type: encode_segmentation_mask
        apply_to: annotation
      - type: resize_segmentation_mask
        apply_to: annotation
        dst_height: 1024
        dst_width: 2048

  - name: road_segmentation
    data_source: segmentation/images
    segmentation_masks_source: segmentation/mask_road_segmentation
    annotation_conversion:
      converter: common_semantic_segmentation
      images_dir: segmentation/images
      masks_dir: segmentation/mask_road_segmentation
      image_postfix: .JPEG
      mask_postfix: .png
      dataset_meta: segmentation/mask_road_segmentation/dataset_meta.json
    annotation: road_segmentation.pickle
    dataset_meta: road_segmentation.json

    preprocessing:
      - type: resize
        dst_height: 512
        dst_width: 896

    postprocessing:
      - type: encode_segmentation_mask
        apply_to: annotation
      - type: resize_segmentation_mask
        apply_to: annotation
        dst_height: 512
        dst_width: 896

    metrics:
      - type: mean_iou
        presenter: print_vector
      - type: mean_accuracy
        presenter: print_vector

  - name: super_resolution_x3
    data_source: super_resolution
    annotation_conversion:
      converter: super_resolution
      data_dir: super_resolution
      lr_suffix: lr_x3
      upsample_suffix: upsample_x3
      hr_suffix: hr
      two_streams: True
    annotation: super_resolution_x3.pickle

    preprocessing:
      - type: auto_resize

    postprocessing:
      - type: resize
        apply_to: prediction

    metrics:
      - type: psnr
        scale_border: 4
        presenter: print_vector

  - name: super_resolution_x4
    data_source: super_resolution
    annotation_conversion:
      converter: super_resolution
      data_dir: super_resolution
      lr_suffix: lr_x4
      upsample_suffix: upsample_x4
      hr_suffix: hr
      two_streams: True
    annotation: super_resolution_x4.pickle

    preprocessing:
      - type: auto_resize

    postprocessing:
      - type: resize
        apply_to: prediction

    metrics:
      - type: psnr
        scale_border: 4
        presenter: print_vector

  - name: text_super_resolution_x3
    data_source: super_resolution
    annotation_conversion:
      converter: super_resolution
      data_dir: super_resolution
      lr_suffix: lr_x3
      hr_suffix: hr_gray
    annotation: text_super_resolution_x3.pickle

    preprocessing:
      - type: bgr_to_gray
      - type: auto_resize

    postprocessing:
      - type: resize
        apply_to: prediction

    metrics:
      - type: psnr
        scale_border: 4
        presenter: print_vector

  - name: head_pose
    data_source: WIDER_val/images/16--Award_Ceremony
    annotation: head_pose.pickle

    preprocessing:
      - type: crop_rect
      - type: resize
        size: 60

  - name: gaze_estimation_dataset
    data_source: gaze_estimation
    annotation: gaze_estimation.pickle

    reader:
      type: combine_reader
      scheme:
        ".*.png": opencv_imread
        ".*.json":
          type: json_reader
          key: head_pose_angles

  - name: handwritten_score_recognition
    data_source: ILSVRC2012_img_val
    annotation: handwritten_score_recognition.pickle
    dataset_meta: handwritten_score_recognition.json

  - name: cmu_panoptic_keypoints
    data_source: cmu_panoptic_subset
    annotation_conversion:
      converter: cmu_panoptic_keypoints
      data_dir: cmu_panoptic_subset
    annotation: cmu_panoptic_keypoints.pickle

  - name: kinetics-400
    data_source: kinetics/frames_val
    annotation_conversion:
      converter: clip_action_recognition
      annotation_file: kinetics/kinetics_400.json
      data_dir: kinetics/frames_val
    annotation: kinetics_action_recognition.pickle
    dataset_meta: kinetics_action_recognition.json

  - name: kinetics-400-frames-79
    data_source: kinetics/frames_val
    annotation_conversion:
      converter: clip_action_recognition
      annotation_file: kinetics/kinetics_400.json
      data_dir: kinetics/frames_val
      clip_duration: 79
    annotation: kinetics_action_recognition_79.pickle
    dataset_meta: kinetics_action_recognition_79.json

  - name: kinetics-400-frames-79-400
    data_source: kinetics/frames_val
    annotation_conversion:
      converter: clip_action_recognition
      annotation_file: kinetics/kinetics_400.json
      data_dir: kinetics/frames_val
      clip_duration: 79
      temporal_stride: 1
      num_samples: 400
    annotation: kinetics_action_recognition_79_400.pickle
    dataset_meta: kinetics_action_recognition_79_400.json

  - name: driver_action_recognition_dataset
    data_source: kinetics/frames_val
    annotation_conversion:
      converter: clip_action_recognition
      annotation_file: kinetics/driver_action_recognition.json
      data_dir: kinetics/frames_val
    annotation: driver_action_recognition.pickle
    dataset_meta: driver_action_recognition.json

  - name: BraTS
    data_source: BraTS
    reader: numpy_reader
    annotation_conversion:
      converter: brats_numpy
      data_dir: BraTS
      ids_file: BraTS/val_ids.p
      labels_file: BraTS/labels
    annotation: brats.pickle
    dataset_meta: brats.json

  - name: BraTS_2017
    data_source: BraTS_2017
    reader:
      type: nifti_reader
      channels_first: True
    annotation_conversion:
      converter: brats
      data_dir: BraTS_2017
      labels_file: BraTS_2017/labels
      mask_channels_first: True
    annotation: brats2017.pickle
    dataset_meta: brats2017.json

  - name: product_detection
    annotation: product_detection.pickle
    dataset_meta: product_detection.json
    data_source: product_detection

    metrics:
      - type: coco_precision

  - name: ms_coco_single_keypoints
    data_source: val2017
    annotation_conversion:
      converter: mscoco_single_keypoints
      annotation_file: person_keypoints_val2017.json
    annotation: mscoco_single_keypoints.pickle
    dataset_meta: mscoco_single_keypoints.json
    metrics:
      - name: AP
        type: coco_orig_keypoints_precision

  - name: CamVid
    data_source: CamVid
    annotation_conversion:
      converter: camvid
      annotation_file: CamVid/val.txt
    annotation: camvid.pickle
    dataset_meta: camvid.json

  - name: msasl-100
    data_source: msasl/global_crops
    annotation_conversion:
      converter: continuous_clip_action_recognition
      annotation_file: msasl/msasl_100.txt
      data_dir: msasl/global_crops
      out_fps: 15
      clip_length: 16
    annotation: msasl_action_recognition.pickle

  - name: jester
    data_source: jester/global_crops
    annotation_conversion:
      converter: continuous_clip_action_recognition
      annotation_file: jester/val.txt
      data_dir: jester/global_crops
      out_fps: 15
      clip_length: 8
    annotation: jester_action_recognition.pickle

  - name: common_sign_language_gestures
    data_source: common_sign_language_gestures/global_crops
    annotation_conversion:
      converter: continuous_clip_action_recognition
      annotation_file: common_sign_language_gestures/test.txt
      data_dir: common_sign_language_gestures/global_crops
      out_fps: 15
      clip_length: 8
      img_prefix: ''
    annotation: common_sign_language_recognition.pickle

  - name: ReDWeb_V1
    data_source: ReDWeb_V1
    annotation_conversion:
      converter: redweb
      data_dir: ReDWeb_V1
      annotation_file: ReDWeb_validation_360.txt
    annotation: mono_depth.pickle

  - name: Kondate_nakayosi
    data_source: Kondate/test_data
    annotation_conversion:
      converter: unicode_character_recognition
      decoding_char_file: Kondate_nakayosi/kondate_nakayosi_char_list.txt
      annotation_file: Kondate_nakayosi/test_img_id_gt.txt
    annotation: kondate_nakayosi_recognition.pickle
    dataset_meta: kondate_nakayosi_recognition.json

  - name: inpainting_free_form_masks
    data_source: VOCdevkit/VOC2012/JPEGImages/
    annotation_conversion:
      converter: inpainting
      images_dir: VOCdevkit/VOC2012/JPEGImages/
      masks_dir: free_form_masks/masks_2k
    annotation: inpainting_voc12_masks2k.pickle

  - name: inpainting_rect_masks
    data_source: VOCdevkit/VOC2012/JPEGImages/
    annotation_conversion:
      converter: inpainting
      images_dir: VOCdevkit/VOC2012/JPEGImages/
    annotation: inpainting_voc12.pickle

    preprocessing:
      - type: auto_resize
      - type: rect_mask
    postprocessing:
      - type: resize
        apply_to: prediction


  - name: mrlEyes_2018_01
    annotation_conversion:
      converter: mrlEyes_2018_01
      data_dir: mrlEyes_2018_01
    annotation: mrlEyes_2018_01.pickle
    dataset_meta: mrlEyes_2018_01.json
    data_source: mrlEyes_2018_01

  - name: ms_coco_style_transfer
    data_source: val2017
    annotation_conversion:
      converter: style_transfer
      images_dir: val2017
    metrics:
      - type: psnr
        scale_border: 0
        presenter: print_vector
      - type: ssim
        presenter: print_vector
    annotation: style_transfer_val2017.pickle

  - name: squad_v1_1_msl384_mql64_ds128_lowercase
    annotation_conversion:
      converter: squad
      testing_file: squad_v1.1/dev-v1.1.json
      max_seq_length: 384
      max_query_length: 64
      doc_stride: 128
      lower_case: true
      vocab_file: vocab.txt

  - name: squad_emb_v1_1_msl384_mql32_lowercase
    annotation_conversion:
      converter: squad_emb
      testing_file: squad_v1.1/dev-v1.1.json
      vocab_file: vocab.txt
      max_seq_length: 384
      max_query_length: 32
      lower_case: true

  - name: librispeech-test-clean
    data_source: librispeech/test/LibriSpeech/test-clean.wav
    annotation_conversion:
      converter: librispeech
      data_dir: librispeech/test/LibriSpeech/test-clean.wav
    annotation: librispeech-test-clean.pickle
    metrics:
      - name: wer

  - name: librispeech-dev-clean
    data_source: librispeech/dev/LibriSpeech/dev-clean.wav
    annotation_conversion:
      converter: librispeech
      data_dir: librispeech/dev/LibriSpeech/dev-clean.wav
    annotation: librispeech-dev-clean.pickle
    metrics:
      - name: wer

  - name: WMT_en_ru
    annotation_conversion:
      converter: wmt
      input_file: wmt19-ru-en.en.spbpe
      reference_file: wmt19-ru-en.ru.spbpe

    reader:
      type: annotation_features_extractor
      features:
        - source

    preprocessing:
      - type: decode_by_sentence_piece_bpe_tokenizer
        vocabulary_file: tokenizer_src/vocab.json
        merges_file: tokenizer_src/merges.txt

      - type: pad_with_eos
        eos_index: 1
        sequence_len: 192

  - name: WMT_ru_en
    annotation_conversion:
      converter: wmt
      input_file: wmt19-ru-en.ru.spbpe
      reference_file: wmt19-ru-en.en.spbpe

    reader:
      type: annotation_features_extractor
      features:
        - source

    preprocessing:
      - type: decode_by_sentence_piece_bpe_tokenizer
        vocabulary_file: tokenizer_src/vocab.json
        merges_file: tokenizer_src/merges.txt

      - type: pad_with_eos
        eos_index: 1
        sequence_len: 192

  - name: WMT2019_en_de
    annotation_conversion:
      converter: wmt
      input_file: WMT_en_de/wmt-en-de.src
      reference_file: WMT_en_de/wmt-en-de.ref

    reader:
      type: annotation_features_extractor
      features:
        - source

    preprocessing:
      - type: decode_by_sentence_piece_bpe_tokenizer
        vocabulary_file: tokenizer_src/vocab.json
        merges_file: tokenizer_src/merges.txt

      - type: pad_with_eos
        eos_index: 1
        sequence_len: 150

  - name: WMT2019_de_en
    annotation_conversion:
      converter: wmt
      input_file: WMT_de_en/wmt-de-en.src
      reference_file: WMT_de_en/wmt-de-en.ref

    reader:
      type: annotation_features_extractor
      features:
        - source

    preprocessing:
      - type: decode_by_sentence_piece_bpe_tokenizer
        vocabulary_file: tokenizer_src/vocab.json
        merges_file: tokenizer_src/merges.txt

      - type: pad_with_eos
        eos_index: 1
        sequence_len: 150

  - name: NYU_Depth_V2
    data_source: nyudepthv2/val/converted/images
    additional_data_source: nyudepthv2/val/converted/depth
    annotation_conversion:
      converter: nyu_depth_v2
      images_dir: nyudepthv2/val/converted/images
      depth_map_dir: nyudepthv2/val/converted/depth
      data_dir: nyudepthv2/val/official
      allow_convert_data: False

  - name: SCUT_EPT
    data_source: SCUT_EPT/test
    annotation_conversion:
      converter: unicode_character_recognition
      decoding_char_file: SCUT_EPT/scut_ept_char_list.txt
      annotation_file: SCUT_EPT/test_img_id_gt.txt
    annotation: scut_ept_recognition.pickle
    dataset_meta: scut_ept_recognition.json

  - name: ADEChallengeData2016
    annotation_conversion:
      converter: ade20k
      images_dir: ADEChallengeData2016/images/validation
      annotations_dir: ADEChallengeData2016/annotations/validation
      object_categories_file: ADEChallengeData2016/objectInfo150.txt
    data_source: ADEChallengeData2016/images/validation
    segmentation_masks_source: ADEChallengeData2016/annotations/validation

  - name: antispoof_test_data
    data_source: antispoof_test_data
    annotation_conversion:
      converter: antispoofing
      data_dir: antispoof_test_data
      annotation_file: antispoof_test_data/metas/intra_test/items_test.json
      label_id: 43
    annotation: antispoofing.pickle
    dataset_meta: antispoofing.json

  - name: sound_classification
    data_source: audio_dataset
    annotation_conversion:
      converter: sound_classification
      annotation_file: audio_dataset/validation.csv
      audio_dir: audio_dataset/data
    annotation: sound_classification.pickle
    reader: wav_reader

  - name: cocosnet_image_translation
    annotation_conversion:
      converter: ade20k_image_translation
      annotations_dir: ADEChallengeData2016/annotations
      reference_file: cocosnet_ade20k_ref_test.txt
    data_source: ADEChallengeData2016
    additional_data_source: ADEChallengeData2016/images

  - name: lfw_bin
    data_source: lfw_bin/images
    annotation_conversion:
      converter: face_recognition_bin
      bin_file:  lfw_bin/lfw.bin
      images_dir: lfw_bin/images
      convert_images: True

  - name: imagenet_colorization
    data_source: ImageNet
    annotation_conversion:
      converter: image_processing
      data_dir: ImageNet
      input_suffix: .JPEG
      target_suffix: .JPEG
      annotation_loader: opencv

  - name: tacotron-io-duration-prediction
    data_source: forward_tacotron_io
    reader: numpy_dict_reader
    annotation_conversion:
      converter: multi_feature_regression
      data_dir: forward_tacotron_io
      prefix: duration_prediction
      input_suffix: in
      reference_suffix: out

  - name: cityscapes
    annotation_conversion:
      converter: cityscapes
      dataset_root_dir: Cityscapes/data

    data_source: Cityscapes/data
    annotation: cityscapes.pickle
    dataset_meta: cityscapes.json

  - name: forward_tacotron_melgan_io
    data_source: forward_tacotron_melgan_io
    reader: numpy_dict_reader
    annotation_conversion:
      converter: multi_feature_regression
      data_dir: forward_tacotron_melgan_io
      input_suffix: duration_prediction_in_
      reference_suffix: melgan_out_

  - name: pitts30k_val
    data_source: pitts250k
    annotation_conversion:
      converter: place_recognition
      split_file: pitts250k/datasets/pitts30k_val.mat
    annotation: pitts30k_val.pickle

  - name: GoPro
    data_source: GOPRO/test
    annotation_conversion:
      converter: image_processing
      data_dir: GOPRO/test
      input_suffix: blur.png
      target_suffix: sharp.png
    annotation: GOPRO.pickle

  - name: speech_denoising_dataset
    data_source: ICASSP_dev_test_set/track_1/synthetic
    reader:
      type: wav_reader
      mono: True
      to_float: True
    annotation_conversion:
      converter: noise_suppression_dataset
      annotation_file: ICASSP_dev_test_set/track_1/synthetic/eval_speech.txt

  - name: PASCAL-S
    annotation_conversion:
      converter: salient_object_detection
      images_dir: PASCAL-S/image
      masks_dir: PASCAL-S/mask
    data_source: PASCAL-S/image
    additional_data_source: PASCAL-S/mask
    annotation: pascal-s.pickle

  - name: LibriSpeech_test_clean_npy_nemo
    data_source: librispeech/test/LibriSpeech/test-clean.npy
    annotation: librispeech_npy.pickle
    annotation_conversion:
      converter: librispeech
      data_dir: librispeech/test/LibriSpeech/test-clean.npy
      use_numpy: true

  - name: VOC2007_detection
    data_source: VOCdevkit/VOC2007/JPEGImages
    annotation_conversion:
      converter: voc_detection
      annotations_dir: VOCdevkit/VOC2007/Annotations
      images_dir: VOCdevkit/VOC2007/JPEGImages
      imageset_file: VOCdevkit/VOC2007/ImageSets/Main/test.txt
    annotation: voc07.pickle
    dataset_meta: voc07.json

  - name: VOC2007_detection_no_bkgr
    data_source: VOCdevkit/VOC2007/JPEGImages
    annotation_conversion:
      converter: voc_detection
      annotations_dir: VOCdevkit/VOC2007/Annotations
      images_dir: VOCdevkit/VOC2007/JPEGImages
      imageset_file: VOCdevkit/VOC2007/ImageSets/Main/test.txt
      has_background: False
    annotation: voc07_without_bkgr.pickle
    dataset_meta: voc07_without_bkgr.json

  - name: CONLL2003_bert_cased
    annotation_conversion:
      converter: conll_ner
      annotation_file: CONLL-2003/valid.txt
      vocab_file: bert-base-ner/vocab.txt
      lower_case: false
      max_len: 128
    annotation: bert_ner.pickle
    dataset_meta: bert_ner.json

  - name: forward_tacotron_melgan_multispeaker_io
    data_source: forward_tacotron_melgan_multispeaker_io
    reader: numpy_dict_reader
    annotation_conversion:
      converter: multi_feature_regression
      data_dir: forward_tacotron_melgan_multispeaker_io
      input_suffix: duration_prediction_in_
      reference_suffix: melgan_out_

  - name: electricity
    annotation_conversion:
      converter: electricity
      data_path_file: LD2011_2014.txt