-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathxml2yolo.py
120 lines (97 loc) · 3.94 KB
/
xml2yolo.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
#!/usr/bin/env python
# -*- coding: utf8 -*-
import os
import sys
from xml.etree import ElementTree
from xml.etree.ElementTree import Element, SubElement
from lxml import etree
import codecs
import cv2
from glob import glob
XML_EXT = '.xml'
ENCODE_METHOD = 'utf-8'
class PascalVocReader:
def __init__(self, filepath):
# shapes type:
# [labbel, [(x1,y1), (x2,y2), (x3,y3), (x4,y4)], color, color, difficult]
self.shapes = []
self.filepath = filepath
self.verified = False
try:
self.parseXML()
except:
pass
def getShapes(self):
return self.shapes
def addShape(self, label, bndbox, filename, difficult):
xmin = float(bndbox.find('xmin').text)
ymin = float(bndbox.find('ymin').text)
xmax = float(bndbox.find('xmax').text)
ymax = float(bndbox.find('ymax').text)
points = [(xmin, ymin), (xmax, ymin), (xmax, ymax), (xmin, ymax)]
self.shapes.append((label, points, filename, difficult))
def parseXML(self):
assert self.filepath.endswith(XML_EXT), "Unsupport file format"
parser = etree.XMLParser(encoding=ENCODE_METHOD)
xmltree = ElementTree.parse(self.filepath, parser=parser).getroot()
filename = xmltree.find('filename').text
path = xmltree.find('path').text
try:
verified = xmltree.attrib['verified']
if verified == 'yes':
self.verified = True
except KeyError:
self.verified = False
for object_iter in xmltree.findall('object'):
bndbox = object_iter.find("bndbox")
label = object_iter.find('name').text
# Add chris
difficult = False
if object_iter.find('difficult') is not None:
difficult = bool(int(object_iter.find('difficult').text))
self.addShape(label, bndbox, path, difficult)
return True
classes = dict()
num_classes = 0
try:
input = raw_input
except NameError:
pass
parentpath = './' #"Directory path with parent dir before xml_dir or img_dir"
addxmlpath = parentpath + 'traffic_label/201702071403' #"Directory path with XML files"
addimgpath = parentpath + 'traffic_img/201702071403' #"Directory path with IMG files"
outputpath = parentpath + 'traffic_format4yolo' #"output folder for yolo format"
classes_txt = './traffic_classes.txt' #"File containing classes"
ext = '.png' #"Image file extension [.jpg or .png]"
if os.path.isfile(classes_txt):
with open(classes_txt, "r") as f:
class_list = f.read().strip().split()
classes = {k : v for (v, k) in enumerate(class_list)}
xmlPaths = glob(addxmlpath + "/*.xml")
#imgPaths = glob(addimgpath + "/*"+ext)
for xmlPath in xmlPaths:
tVocParseReader = PascalVocReader(xmlPath)
shapes = tVocParseReader.getShapes()
with open(outputpath + "/" + os.path.basename(xmlPath)[:-4] + ".txt", "w") as f:
for shape in shapes:
class_name = shape[0]
box = shape[1]
#filename = os.path.splittext(xmlPath)[0] + ext
filename = os.path.splitext(addimgpath + "/" + os.path.basename(xmlPath)[:-4])[0] + ext
if class_name not in classes.keys():
classes[class_name] = num_classes
num_classes += 1
class_idx = classes[class_name]
(height, width, _) = cv2.imread(filename).shape
coord_min = box[0]
coord_max = box[2]
xcen = float((coord_min[0] + coord_max[0])) / 2 / width
ycen = float((coord_min[1] + coord_max[1])) / 2 / height
w = float((coord_max[0] - coord_min[0])) / width
h = float((coord_max[1] - coord_min[1])) / height
f.write("%d %.06f %.06f %.06f %.06f\n" % (class_idx, xcen, ycen, w, h))
print(class_idx, xcen, ycen, w, h)
with open(parentpath + "classes.txt", "w") as f:
for key in classes.keys():
f.write("%s\n" % key)
print(key)