Skip to content

Commit

Permalink
support recursive search dirs (#38)
Browse files Browse the repository at this point in the history
* support recursive search dirs

* fix pylint error
  • Loading branch information
GreatV authored Dec 19, 2023
1 parent dba6184 commit cc4171e
Show file tree
Hide file tree
Showing 3 changed files with 109 additions and 130 deletions.
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -1,3 +1,4 @@
opencv-python
Pillow
numpy
tqdm
2 changes: 1 addition & 1 deletion src/labelme2yolo/__about__.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@
'''
about version
'''
__version__ = '0.1.3'
__version__ = '0.1.4'
236 changes: 107 additions & 129 deletions src/labelme2yolo/l2y.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,26 +5,30 @@
@author: GreatV(Wang Xin)
"""
import base64
import glob
import io
import json
import math
import os
import random
import shutil
from collections import OrderedDict
from multiprocessing import Pool
import uuid
import logging

import PIL.ExifTags
import PIL.Image
import PIL.ImageOps
import cv2
import numpy as np
import tqdm

# set seed
random.seed(12345678)
random.Random().seed(12345678)
np.random.seed(12345678)

# number of LabelMe2YOLO multiprocessing threads
NUM_THREADS = max(1, os.cpu_count() - 1)
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger("labelme2yolo")


def train_test_split(dataset_index, test_size=0.2):
Expand Down Expand Up @@ -97,21 +101,6 @@ def img_data_to_png_data(img_data):
return f_in.read()


def get_label_id_map(json_dir: str):
"""Get label id map from json files in json_dir"""
label_set = set()

for file_name in os.listdir(json_dir):
if file_name.endswith("json"):
json_path = os.path.join(json_dir, file_name)
with open(json_path, encoding="utf-8") as file:
data = json.load(file)
for shape in data["shapes"]:
label_set.add(shape["label"])

return OrderedDict([(label, label_id) for label_id, label in enumerate(label_set)])


def extend_point_list(point_list, out_format="polygon"):
"""Extend point list to polygon or bbox"""
x_min = min(float(point) for point in point_list[::2])
Expand All @@ -131,32 +120,24 @@ def extend_point_list(point_list, out_format="polygon"):
return np.array([x_min, y_min, x_max, y_min, x_max, y_max, x_min, y_max])


def save_yolo_label(json_name, label_dir_path, target_dir, yolo_obj_list):
def save_yolo_label(obj_list, label_dir, target_dir, target_name):
"""Save yolo label to txt file"""
txt_path = os.path.join(label_dir_path,
target_dir,
json_name.replace(".json", ".txt"))
txt_path = os.path.join(label_dir, target_dir, target_name)

with open(txt_path, "w+", encoding="utf-8") as file:
for yolo_obj in yolo_obj_list:
label, points = yolo_obj
for label, points in obj_list:
points = [str(item) for item in points]
yolo_obj_line = f"{label} {' '.join(points)}\n"
file.write(yolo_obj_line)
line = f"{label} {' '.join(points)}\n"
file.write(line)


def save_yolo_image(json_data, json_path, image_dir_path, target_dir):
def save_yolo_image(json_data, json_dir, image_dir, target_dir, target_name):
"""Save yolo image to image_dir_path/target_dir"""
json_name = os.path.basename(json_path)
img_name = json_name.replace(".json", ".png")

# make image_path and save image
img_path = os.path.join(image_dir_path, target_dir, img_name)
img_path = os.path.join(image_dir, target_dir, target_name)

if json_data["imageData"] is None:
dirname = os.path.dirname(json_path)
image_name = json_data["imagePath"]
src_image_name = os.path.join(dirname, image_name)
src_image_name = os.path.join(json_dir, image_name)
src_image = cv2.imread(src_image_name)
cv2.imwrite(img_path, src_image)
else:
Expand All @@ -170,31 +151,36 @@ class Labelme2YOLO:
"""Labelme to YOLO format converter"""

def __init__(self, json_dir, output_format, label_list):
self._json_dir = json_dir
self._json_dir = os.path.expanduser(json_dir)
self._output_format = output_format
self._label_list = label_list
self._label_list = []
self._label_id_map = {}
self._label_dir_path = ""
self._image_dir_path = ""

if label_list:
self._label_id_map = {label: label_id
for label_id, label in enumerate(label_list)}
else:
self._label_id_map = get_label_id_map(self._json_dir)
self._label_list = list(self._label_id_map.keys())
self._label_list = label_list
self._label_id_map = {
label: label_id for label_id, label in enumerate(label_list)
}

def _update_id_map(self, label: str):
if label not in self._label_list:
self._label_list.append(label)
self._label_id_map[label] = len(self._label_id_map)

def _make_train_val_dir(self):
self._label_dir_path = os.path.join(self._json_dir,
'YOLODataset/labels/')
self._image_dir_path = os.path.join(self._json_dir,
'YOLODataset/images/')

for yolo_path in (os.path.join(self._label_dir_path + 'train/'),
os.path.join(self._label_dir_path + 'val/'),
os.path.join(self._label_dir_path + 'test/'),
os.path.join(self._image_dir_path + 'train/'),
os.path.join(self._image_dir_path + 'val/'),
os.path.join(self._image_dir_path + 'test/')):
self._label_dir_path = os.path.join(self._json_dir, "YOLODataset/labels/")
self._image_dir_path = os.path.join(self._json_dir, "YOLODataset/images/")

for yolo_path in (
os.path.join(self._label_dir_path + "train/"),
os.path.join(self._label_dir_path + "val/"),
os.path.join(self._label_dir_path + "test/"),
os.path.join(self._image_dir_path + "train/"),
os.path.join(self._image_dir_path + "val/"),
os.path.join(self._image_dir_path + "test/"),
):
if os.path.exists(yolo_path):
shutil.rmtree(yolo_path)

Expand All @@ -207,31 +193,21 @@ def _get_dataset_part_json_names(self, dataset_part: str):
for sample_name in os.listdir(set_folder):
set_dir = os.path.join(set_folder, sample_name)
if os.path.isdir(set_dir):
json_names.append(sample_name + '.json')
json_names.append(sample_name + ".json")
return json_names

def _train_test_split(self, folders, json_names, val_size, test_size):
def _train_test_split(self, json_names, val_size, test_size):
"""Split json names to train, val, test"""
if (len(folders) > 0 and
'train' in folders and
'val' in folders and
'test' in folders):
train_json_names = self._get_dataset_part_json_names('train')
val_json_names = self._get_dataset_part_json_names('val')
test_json_names = self._get_dataset_part_json_names('test')

return train_json_names, val_json_names, test_json_names

total_size = len(json_names)
dataset_index = list(range(total_size))
train_ids, val_ids = train_test_split(dataset_index,
test_size=val_size)
train_ids, val_ids = train_test_split(dataset_index, test_size=val_size)
test_ids = []
if test_size is None:
test_size = 0.0
if test_size > 1e-8:
train_ids, test_ids = train_test_split(
train_ids, test_size=test_size / (1 - val_size))
train_ids, test_size=test_size / (1 - val_size)
)
train_json_names = [json_names[train_idx] for train_idx in train_ids]
val_json_names = [json_names[val_idx] for val_idx in val_ids]
test_json_names = [json_names[test_idx] for test_idx in test_ids]
Expand All @@ -240,64 +216,56 @@ def _train_test_split(self, folders, json_names, val_size, test_size):

def convert(self, val_size, test_size):
"""Convert labelme format to yolo format"""
json_names = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isfile(os.path.join(self._json_dir, file_name)) and
file_name.endswith('.json')]
folders = [file_name for file_name in os.listdir(self._json_dir)
if os.path.isdir(os.path.join(self._json_dir, file_name))]
json_names = glob.glob(
os.path.join(self._json_dir, "**", "*.json"), recursive=True
)
json_names = sorted(json_names)
train_json_names, val_json_names, test_json_names = self._train_test_split(
folders, json_names, val_size, test_size)
json_names, val_size, test_size
)

self._make_train_val_dir()

# convert labelme object to yolo format object, and save them to files
# also get image from labelme json file and save them under images folder
dirs = ('train/', 'val/', 'test/')
dirs = ("train/", "val/", "test/")
names = (train_json_names, val_json_names, test_json_names)
for target_dir, json_names in zip(dirs, names):
target_part = target_dir.replace("/", "")
logger.info("Converting %s set ...", target_part)
for json_name in tqdm.tqdm(json_names):
self.covert_json_to_text(target_dir, json_name)

with Pool(NUM_THREADS) as pool:
for json_name in json_names:
pool.apply_async(self.covert_json_to_text,
args=(target_dir, json_name))
pool.close()
pool.join()

print('Generating dataset.yaml file ...')
self._save_dataset_yaml()

def covert_json_to_text(self, target_dir, json_name):
"""Convert json file to yolo format text file and save them to files"""
json_path = os.path.join(self._json_dir, json_name)
with open(json_path, encoding="utf-8") as file:
with open(json_name, encoding="utf-8") as file:
json_data = json.load(file)

print(f"Converting {json_name} for {target_dir.replace('/', '')} ...")

img_path = save_yolo_image(json_data,
json_path,
self._image_dir_path,
target_dir)
filename: str = uuid.UUID(int=random.Random().getrandbits(128)).hex
image_name = f"{filename}.png"
label_name = f"{filename}.txt"
img_path = save_yolo_image(
json_data, self._json_dir, self._image_dir_path, target_dir, image_name
)
yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name,
self._label_dir_path,
target_dir,
yolo_obj_list)
save_yolo_label(yolo_obj_list, self._label_dir_path, target_dir, label_name)

def convert_one(self, json_name):
"""Convert one json file to yolo format text file and save them to files"""
json_path = os.path.join(self._json_dir, json_name)
with open(json_path, encoding="utf-8") as file:
json_data = json.load(file)

print(f'Converting {json_name} ...')

img_path = save_yolo_image(json_data, json_name,
self._json_dir, '')
image_name = json_name.replace(".json", ".png")
label_name = json_name.replace(".json", ".txt")
img_path = save_yolo_image(
json_data, self._json_dir, self._image_dir_path, "", image_name
)

yolo_obj_list = self._get_yolo_object_list(json_data, img_path)
save_yolo_label(json_name, self._json_dir,
'', yolo_obj_list)
save_yolo_label(yolo_obj_list, self._label_dir_path, "", label_name)

def _get_yolo_object_list(self, json_data, img_path):
yolo_obj_list = []
Expand All @@ -306,22 +274,23 @@ def _get_yolo_object_list(self, json_data, img_path):
for shape in json_data["shapes"]:
# labelme circle shape is different from others
# it only has 2 points, 1st is circle center, 2nd is drag end point
if shape['shape_type'] == 'circle':
yolo_obj = self._get_circle_shape_yolo_object(
shape, img_h, img_w)
if shape["shape_type"] == "circle":
yolo_obj = self._get_circle_shape_yolo_object(shape, img_h, img_w)
else:
yolo_obj = self._get_other_shape_yolo_object(
shape, img_h, img_w)
yolo_obj = self._get_other_shape_yolo_object(shape, img_h, img_w)

yolo_obj_list.append(yolo_obj)
if yolo_obj:
yolo_obj_list.append(yolo_obj)

return yolo_obj_list

def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
obj_center_x, obj_center_y = shape['points'][0]
obj_center_x, obj_center_y = shape["points"][0]

radius = math.sqrt((obj_center_x - shape['points'][1][0]) ** 2 +
(obj_center_y - shape['points'][1][1]) ** 2)
radius = math.sqrt(
(obj_center_x - shape["points"][1][0]) ** 2
+ (obj_center_y - shape["points"][1][1]) ** 2
)
obj_w = 2 * radius
obj_h = 2 * radius

Expand All @@ -330,46 +299,55 @@ def _get_circle_shape_yolo_object(self, shape, img_h, img_w):
yolo_w = round(float(obj_w / img_w), 6)
yolo_h = round(float(obj_h / img_h), 6)

if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']]
if shape["label"]:
label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]

return label_id, yolo_center_x, yolo_center_y, yolo_w, yolo_h

raise f"label {shape['label']} not in {self._label_list}"
return None

def _get_other_shape_yolo_object(self, shape, img_h, img_w):

point_list = shape['points']
point_list = shape["points"]
points = np.zeros(2 * len(point_list))
points[::2] = [float(point[0]) / img_w for point in point_list]
points[1::2] = [float(point[1]) / img_h for point in point_list]

if len(points) == 4:
if self._output_format == "polygon":
points = extend_point_list(points)
if self._output_format == "bbox":
points = extend_point_list(points, "bbox")

if shape['label'] in self._label_id_map:
label_id = self._label_id_map[shape['label']]
if shape["label"]:
label = shape["label"]
if label not in self._label_list:
self._update_id_map(label)
label_id = self._label_id_map[shape["label"]]

return label_id, points.tolist()

raise f"label {shape['label']} not in {self._label_list}"
return None

def _save_dataset_yaml(self):
yaml_path = os.path.join(
self._json_dir, 'YOLODataset/', 'dataset.yaml')
yaml_path = os.path.join(self._json_dir, "YOLODataset/", "dataset.yaml")

with open(yaml_path, 'w+', encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, 'train/')
val_dir = os.path.join(self._image_dir_path, 'val/')
test_dir = os.path.join(self._image_dir_path, 'test/')
with open(yaml_path, "w+", encoding="utf-8") as yaml_file:
train_dir = os.path.join(self._image_dir_path, "train/")
val_dir = os.path.join(self._image_dir_path, "val/")
test_dir = os.path.join(self._image_dir_path, "test/")

names_str = ''
names_str = ""
for label, _ in self._label_id_map.items():
names_str += f"\"{label}\", "
names_str += f'"{label}", '
names_str = names_str.rstrip(", ")

content = (f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
f"nc: {len(self._label_id_map)}\n"
f"names: [{names_str}]")
content = (
f"train: {train_dir}\nval: {val_dir}\ntest: {test_dir}\n"
f"nc: {len(self._label_id_map)}\n"
f"names: [{names_str}]"
)

yaml_file.write(content)

0 comments on commit cc4171e

Please sign in to comment.