-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathvisdrone.py
100 lines (82 loc) · 3.76 KB
/
visdrone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
import numpy as np
from PIL import Image
from mrcnn.config import Config
import mrcnn.utils as utils
### Configurations
class VisDroneConfig(Config):
"""Configuration for training on the mirror dataset.
Derives from the base Config class and overrides values specific
to the mirror dataset.
"""
# Give the configuration a recognizable name
NAME = "VisDrone"
# Train on 1 GPU and 8 images per GPU. We can put multiple images on each
# GPU because the images are small. Batch size is 8 (GPUs * images/GPU).
GPU_COUNT = 1
IMAGES_PER_GPU = 1
# Number of classes (including background)
NUM_CLASSES = 2 + 10 # background + 10 category
# Use small images for faster training. Set the limits of the small side
# the large side, and that determines the image shape.
IMAGE_RESIZE_MODE = "square"
# options:
# none: No resizing or padding. Return the image unchanged.
# square: Resize and pad with zeros to get a square image
# of size [max_dim, max_dim].
# pad64: Pads width and height with zeros to make them multiples of 64.
# If IMAGE_MIN_DIM or IMAGE_MIN_SCALE are not None, then it scales
# up before padding. IMAGE_MAX_DIM is ignored in this mode.
# The multiple of 64 is needed to ensure smooth scaling of feature
# maps up and down the 6 levels of the FPN pyramid (2**6=64).
# crop: Picks random crops from the image. First, scales the image based
# on IMAGE_MIN_DIM and IMAGE_MIN_SCALE, then picks a random crop of
# size IMAGE_MIN_DIM x IMAGE_MIN_DIM. Can be used in training only.
# IMAGE_MAX_DIM is not used in this mode.
IMAGE_MIN_DIM = 1600
IMAGE_MAX_DIM = 2560 # in square mode, return an image of 2560*2560.
BACKBONE_STRIDES = [4, 8, 16, 32, 64, 128]
# RPN_ANCHOR_SCALES = (8, 16, 32, 64, 128, 256) # anchor side in pixels
RPN_ANCHOR_SCALES = (256, 128, 64, 32, 16, 8)
RPN_ANCHOR_RATIOS = [0.5, 1, 2]
# Reduce training ROIs per image because the images are small and have
# few objects. Aim to allow ROI sampling to pick 33% positive ROIs.
TRAIN_ROIS_PER_IMAGE = 100
# Use a small epoch since the data is simple
STEPS_PER_EPOCH = 3000
# use small validation steps since the epoch is small
VALIDATION_STEPS = 100
# skip detection with <x% confidence
DETECTION_MIN_CONFIDENCE = 0.9
### Dataset
class VisDroneDataset(utils.Dataset):
# TaylorMei
def load_VisDrone(self, count, img_folder, imglist, dataset_path):
self.add_class("VisDrone", 1, "pedestrian")
self.add_class("VisDrone", 2, "people")
self.add_class("VisDrone", 3, "bicycle")
self.add_class("VisDrone", 4, "car")
self.add_class("VisDrone", 5, "van")
self.add_class("VisDrone", 6, "truck")
self.add_class("VisDrone", 7, "tricycle")
self.add_class("VisDrone", 8, "awning-tricycle")
self.add_class("VisDrone", 9, "bus")
self.add_class("VisDrone", 10, "motor")
for i in range(count):
image_name = imglist[i].split(".")[0]
anno_path = dataset_path + "/annotations/" + image_name + ".txt"
self.add_image("VisDrone", image_id=i, path=img_folder + "/" + imglist[i],
anno_path=anno_path)
# TaylorMei
def load_anno(self, image_id):
label = []
info = self.image_info[image_id]
label_txt = open(info["anno_path"], "r")
try:
count = 0
for line in label_txt:
(x, y, w, h, score, cls, truncation, occlusion) = line.split(',')
label.append((x, y, w, h, score, cls, truncation, occlusion))
count += 1
finally:
label_txt.close()
return label, count