-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathmake_dateset.py
105 lines (90 loc) · 3.85 KB
/
make_dateset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
# !/usr/bin/python3
# -*- coding: utf-8 -*-
# @Time : 01/04/21
# @Author : YangShiMin
# @Email : [email protected]
# @File : make_dateset.py
# @Software: PyCharm
import os
import cv2
import numpy as np
from config import CORNER_IMAGE_DIR, POINT_IMAGE_DIR, SINGLE_CHINESE_IMAGE_DIR
from utils import opencv_read_image, opencv_write_image
from yolo.mode_one import run_click
from logger.get_logger import logger
def save_image_corner(image_path, corner_out_dir, title):
"""
保存图片上左下角区域的图片,并以识别结果作为图片名
"""
# 原图的哈希值
image_hash_value = os.path.basename(os.path.splitext(image_path)[0])
image = opencv_read_image(image_path)
# 读取左下角区域的图片的像素值
corner = image[344:, :116]
# cv2.imshow("corner", corner)
# cv2.waitKey()
if not os.path.exists(corner_out_dir):
os.makedirs(corner_out_dir)
out_path = os.path.join(corner_out_dir, f"{title}_{image_hash_value}.jpg")
opencv_write_image(out_path, corner)
def save_image_point(image_path, store_point_image_dir, store_single_chinese_dir, point_list):
"""
用point_point_hash.ext的方式命名图片
"""
# 原图的哈希值
image_hash_value = os.path.basename(os.path.splitext(image_path)[0])
image = opencv_read_image(image_path)
# 保存单字符
save_single_chinese(image, store_single_chinese_dir, point_list, image_path)
points = [",".join(map(lambda point: str(point), list(points.values())[0])) for points in point_list]
points_str = '_'.join(points)
image_path = os.path.join(store_point_image_dir, f"{points_str}_{image_hash_value}.jpg")
opencv_write_image(image_path, image)
def save_single_chinese(image_obj, store_single_dir, points_list, origin_image_path):
"""
根据传入的坐标保存所定位的文字
"""
for points in points_list:
single_chinese = list(points.keys())[0]
point = list(points.values())[0]
if not (point and single_chinese):
logger.error(f"汉字无法和坐标点想匹配: {origin_image_path}")
continue
single_point_image = image_obj[point[1]:point[3], point[0]:point[2]]
# cv2.imshow(single_chinese, single_point_image)
# cv2.waitKey()
if single_point_image is []:
logger.error(f"坐标点异常: {origin_image_path}")
return
# 原图的哈希值
image_hash_value = os.path.basename(os.path.splitext(origin_image_path)[0])
image_path = os.path.join(store_single_dir, single_chinese, f'{single_chinese}_{image_hash_value}.jpg')
opencv_write_image(image_path, single_point_image)
def get_image_infos(image_path):
"""通过别人实现的模型来获取文字的定位"""
title = None
point_infos = dict()
infos = run_click(image_path)
print(infos)
for info in infos:
if info.get("classes") == "title":
title = info.get("content")
elif info.get("classes") == "target":
point_infos[info.get("content")] = info.get("crop")
sort_points = [{single_chinese: point_infos.get(single_chinese, [])} for single_chinese in title]
return {"title": title, "points": sort_points}
def make_dateset(origin_image_path):
"""
原始图片库的目录路径
"""
for entry in os.scandir(origin_image_path):
file_path = entry.path
infos = get_image_infos(file_path)
save_image_corner(file_path, CORNER_IMAGE_DIR, infos["title"])
save_image_point(file_path, POINT_IMAGE_DIR, SINGLE_CHINESE_IMAGE_DIR, infos["points"])
logger.info(f'完成图片的切割: {file_path}')
if __name__ == "__main__":
dir_path = r'D:\极验文字点选原始图片'
make_dateset(dir_path)
# infos = get_image_infos(r'D:\极验文字点选原始图片\a79ec9433f2546c00bad84402374f958.jpg')
# print(infos)