-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathprocessing.py
136 lines (84 loc) · 3.6 KB
/
processing.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
from extract_document.bank_cheque import *
from extract_document.pancard import *
from extract_document.face import *
from extract_document.driving_licence import *
from extract_document.adhar_card import *
from utils.utils import *
import cv2
import logging
logger = logging.getLogger("main")
def text_detection(img_path, doc_type):
"""
:param img_path: input image_path for processing
:param doc_type: document type
:return: image with extracted text
"""
img = cv2.imread(img_path)
# document is bank_cheque
if doc_type == "Bank_Cheque":
cheque_details = {}
# get details from the image
cheque_details["ACC_No."] = ensemble_acc_output(img_path)
cheque_details["IFSC"] = ensemble_ifsc_output(img_path)
cheque_details["MICR"] = get_micrcode(img_path)
logger.debug("[INFO] Cheque details extracted: {}".format(cheque_details))
return img, cheque_details
elif doc_type == "Pancard":
# get details from the image
pan_details = get_pan_number(img_path)
# detect face in the document
isface, image = detect_faces(img_path)
if isface:
pan_details["Face_Found"] = "True"
else:
pan_details["Face_Found"] = "False"
logger.debug("[INFO] Pancard Clean Text Extracted: {}".format(pan_details))
return image, pan_details
elif doc_type == "Driving_Licence":
# recognize raw text first
raw_text = recognise_text(img_path)
logger.debug("[INFO] Driving Licence Raw Text `{}`".format(raw_text))
# extract labels from the recognised text according to the image_type
license_details = {idx: text for idx, text in enumerate(raw_text)}
license_details = get_licence_text(license_details)
# detect face in the document
isface, image = detect_faces(img_path)
if isface:
license_details["Face_Found"] = "True"
else:
license_details["Face_Found"] = "False"
logger.debug("[INFO] Driving Licence Clean Text Extracted: {}".format(license_details))
return image, license_details
elif doc_type == "Aadhar_Card":
# recognize raw text first
raw_text = recognise_text(img_path)
logger.debug("[INFO] Aadhar Card Raw Text `{}`".format(raw_text))
# extract labels from the recognised text according to the image_type
aadhar_details = get_aadhar_text(raw_text)
# detect face in the document
isface, image = detect_faces(img_path)
if isface:
aadhar_details["Face_Found"] = "True"
else:
aadhar_details["Face_Found"] = "False"
logger.debug("[INFO] Aadhar Card Clean Text Extracted: {}".format(aadhar_details))
return image, aadhar_details
# if the document
elif doc_type == "Others":
# extract all the text
others_details = simple_text_extraction(img_path)
others_details = clean_text(others_details)
others_details = [ i for i in others_details.split() if len(str(i)) >= 4 ]
# detect face in the document
isface, image = detect_faces(img_path)
final_details = dict()
final_details["Extracted Data"] = others_details
if isface:
final_details["Face"] = "Face Found"
else:
final_details["Face"] = "Face Not Found"
logger.debug("[INFO] Other Document Clean Text Extracted: {}".format(final_details))
return image, final_details
else:
logger.debug("[INFO] No document present...")
return img, "No text found"