Skip to content

Latest commit

 

History

History
384 lines (297 loc) · 9 KB

coco.md

File metadata and controls

384 lines (297 loc) · 9 KB

COCO Utilities

COCO dataset creation:
  • import required classes:
from sahi.utils.coco import Coco, CocoCategory, CocoImage, CocoAnnotation
  • init Coco object:
coco = Coco()
  • add categories starting from id 0:
coco.add_category(CocoCategory(id=0, name='human'))
coco.add_category(CocoCategory(id=1, name='vehicle'))
  • create a coco image:
coco_image = CocoImage(file_name="image1.jpg", height=1080, width=1920)
  • add annotations to coco image:
coco_image.add_annotation(
  CocoAnnotation(
    bbox=[x_min, y_min, width, height],
    category_id=0,
    category_name='human'
  )
)
coco_image.add_annotation(
  CocoAnnotation(
    bbox=[x_min, y_min, width, height],
    category_id=1,
    category_name='vehicle'
  )
)
  • add coco image to Coco object:
coco.add_image(coco_image)
  • after adding all images, convert coco object to coco json:
coco_json = coco.json
  • you can export it as json file:
from sahi.utils.file import save_json

save_json(coco_json, "coco_dataset.json")
Slice COCO dataset images and annotations into grids:
from sahi.slicing import slice_coco

coco_dict, coco_path = slice_coco(
    coco_annotation_file_path="coco.json",
    image_dir="source/coco/image/dir",
    slice_height=256,
    slice_width=256,
    overlap_height_ratio=0.2,
    overlap_width_ratio=0.2,
)
Split COCO dataset into train/val:
from sahi.utils.coco import Coco

# specify coco dataset path
coco_path = "coco.json"

# init Coco object
coco = Coco.from_coco_dict_or_path(coco_path)

# split COCO dataset with a 85% train/15% val split
result = coco.split_coco_as_train_val(
  train_split_rate=0.85
)

# export train val split files
save_json(result["train_coco"].json, "train_split.json")
save_json(result["val_coco"].json, "val_split.json")
Filter/Update COCO dataset by categories:
from sahi.utils.coco import Coco
from sahi.utils.file import save_json

# init Coco objects by specifying coco dataset paths and image folder directories
coco = Coco.from_coco_dict_or_path("coco.json")

# select only 3 categories; and map them to ids 1, 2 and 3
desired_name2id = {
  "big_vehicle": 1,
  "car": 2,
  "human": 3
}
coco.update_categories(desired_name2id)

# export updated/filtered COCO dataset
save_json(coco.json, "updated_coco.json")
Filter COCO dataset by annotation area:
from sahi.utils.coco import Coco
from sahi.utils.file import save_json

# init Coco objects by specifying coco dataset paths and image folder directories
coco = Coco.from_coco_dict_or_path("coco.json")

# filter out images that contain annotations with smaller area than 50
area_filtered_coco = coco.get_area_filtered_coco(min=50)
# filter out images that contain annotations with smaller area than 50 and larger area than 10000
area_filtered_coco = coco.get_area_filtered_coco(min=50, max=10000)
# filter out images with seperate area intervals per category
intervals_per_category = {
  "human": {"min": 20, "max": 10000},
  "vehicle": {"min": 50, "max": 15000},
}
area_filtered_coco = coco.get_area_filtered_coco(intervals_per_category=intervals_per_category)

# export filtered COCO dataset
save_json(area_filtered_coco.json, "area_filtered_coco.json")
Filter out images that does not contain any annotation:
from sahi.utils.coco import Coco

# set ignore_negative_samples as False if you want images without annotations present in json and yolov5 exports
coco = Coco.from_coco_dict_or_path("coco.json", ignore_negative_samples=False)
Merge COCO dataset files:
from sahi.utils.coco import Coco
from sahi.utils.file import save_json

# init Coco objects by specifying coco dataset paths and image folder directories
coco_1 = Coco.from_coco_dict_or_path("coco1.json", image_dir="images_1/")
coco_2 = Coco.from_coco_dict_or_path("coco2.json", image_dir="images_2/")

# merge Coco datasets
coco_1.merge(coco_2)

# export merged COCO dataset
save_json(coco_1.json, "merged_coco.json")
Convert COCO dataset to ultralytics/yolov5 format:
from sahi.utils.coco import Coco

# init Coco object
coco = Coco.from_coco_dict_or_path("coco.json", image_dir="coco_images/")

# export converted YoloV5 formatted dataset into given output_dir with a 85% train/15% val split
coco.export_as_yolov5(
  output_dir="output/folder/dir",
  train_split_rate=0.85
)
Convert train/val COCO dataset to ultralytics/yolov5 format:
from sahi.utils.coco import Coco, export_coco_as_yolov5

# init Coco object
train_coco = Coco.from_coco_dict_or_path("train_coco.json", image_dir="coco_images/")
val_coco = Coco.from_coco_dict_or_path("val_coco.json", image_dir="coco_images/")

# export converted YoloV5 formatted dataset into given output_dir with given train/val split
data_yml_path = export_coco_as_yolov5(
  output_dir="output/folder/dir",
  train_coco=train_coco,
  val_coco=val_coco
)
Subsample COCO dataset file:
from sahi.utils.coco import Coco

# specify coco dataset path
coco_path = "coco.json"

# init Coco object
coco = Coco.from_coco_dict_or_path(coco_path)

# create a Coco object with 1/10 of total images
subsampled_coco = coco.get_subsampled_coco(subsample_ratio=10)

# export subsampled COCO dataset
save_json(subsampled_coco.json, "subsampled_coco.json")

# bonus: create a Coco object with 1/10 of total images that contain first category
subsampled_coco = coco.get_subsampled_coco(subsample_ratio=10, category_id=0)

# bonus2: create a Coco object with negative samples reduced to 1/10
subsampled_coco = coco.get_subsampled_coco(subsample_ratio=10, category_id=-1)
Upsample COCO dataset file:
from sahi.utils.coco import Coco

# specify coco dataset path
coco_path = "coco.json"

# init Coco object
coco = Coco.from_coco_dict_or_path(coco_path)

# create a Coco object with each sample is repeated 10 times
upsampled_coco = coco.get_upsampled_coco(upsample_ratio=10)

# export upsampled COCO dataset
save_json(upsampled_coco.json, "upsampled_coco.json")

# bonus: create a Coco object with images that contain first category repeated 10 times
subsampled_coco = coco.get_subsampled_coco(upsample_ratio=10, category_id=0)

# bonus2: create a Coco object with negative samples upsampled by 10 times
upsampled_coco = coco.get_upsampled_coco(upsample_ratio=10, category_id=-1)
Get dataset stats:
from sahi.utils.coco import Coco

# init Coco object
coco = Coco.from_coco_dict_or_path("coco.json")

# get dataset stats
coco.stats
{
  'num_images': 6471,
  'num_annotations': 343204,
  'num_categories': 2,
  'num_negative_images': 0,
  'num_images_per_category': {'human': 5684, 'vehicle': 6323},
  'num_annotations_per_category': {'human': 106396, 'vehicle': 236808},
  'min_num_annotations_in_image': 1,
  'max_num_annotations_in_image': 902,
  'avg_num_annotations_in_image': 53.037243084530985,
  'min_annotation_area': 3,
  'max_annotation_area': 328640,
  'avg_annotation_area': 2448.405738278109,
  'min_annotation_area_per_category': {'human': 3, 'vehicle': 3},
  'max_annotation_area_per_category': {'human': 72670, 'vehicle': 328640},
}
Remove invalid coco results:
from sahi.utils.file import save_json
from sahi.utils.coco import remove_invalid_coco_results

# remove invalid predictions from COCO results JSON
coco_results = remove_invalid_coco_results("coco_result.json")

# export processed COCO results
save_json(coco_results, "fixed_coco_result.json")

# bonus: remove invalid predictions from COCO results JSON by giving COCO
# dataset path to also filter out bbox results exceeding image height&width
coco_results = remove_invalid_coco_results("coco_result.json", "coco_dataset.json")
Get COCO with clipped bounding boxes:
  • import required classes:
from sahi.utils.coco import Coco
from sahi.utils.file import save_json

Usage:

# Clip overflowing bounding boxes to image width & height
coco = Coco.from_coco_dict_or_path(coco_path, clip_bboxes_to_img_dims=True)

or,

# apply to your already created coco object
coco = coco.get_coco_with_clipped_bboxes()
  • Export your clipped_bboxed_coco:
save_json(coco.json, "coco.json")