-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy patheasy_ocr_test.py
147 lines (120 loc) · 5.62 KB
/
easy_ocr_test.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
import cv2
import numpy as np
from scipy.ndimage import interpolation as inter
import re
import easyocr
import csv
# mouse callback for inputs
# def define_region(event,x,y,flags):
# global ix,iy,drawing
# if event == cv2.EVENT_LBUTTONDOWN:
def correct_skew(image, delta=1, limit=5):
def determine_score(arr, angle):
data = inter.rotate(arr, angle, reshape=False, order=0)
histogram = np.sum(data, axis=1, dtype=float)
score = np.sum((histogram[1:] - histogram[:-1]) ** 2, dtype=float)
return histogram, score
scores = []
angles = np.arange(-limit, limit+delta, delta)
for angle in angles:
histogram, score = determine_score(image, angle)
scores.append(score)
best_angle = angles[scores.index(max(scores))]
(h, w) = image.shape[:2]
center = (w // 2, h // 2)
M = cv2.getRotationMatrix2D(center, best_angle, 1.0)
corrected = cv2.warpAffine(image, M, (w, h), flags=cv2.INTER_CUBIC, \
borderMode=cv2.BORDER_REPLICATE)
return best_angle, corrected
def preprocess_image(image):
# Convert image to grayscale
gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# Apply GaussianBlur to reduce noise and improve OCR accuracy
gray = cv2.GaussianBlur(gray, (5, 5), 0)
#binarization/Otsu's thresholding
_, binary = cv2.threshold(gray, 0 ,255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
#skew correction
corrAngle, processed_image = correct_skew(binary)
#thinning/skeletonization
# not necessary because of the constant width
return corrAngle, processed_image
def format_EtCO2(text):
"""
Function to format EtCO2 value by inserting a decimal point if missing.
Assumes the decimal point is missing between the last two digits.
"""
if re.fullmatch(r'\d{2}', text):
return text[0] + '.' + text[1]
return text
def extract_text_from_video(video_path, isEtCO2, preprocess):
cap = cv2.VideoCapture(video_path)
detectedData = {}
firstFrame = True
frameCnt = 0
frameSkip = 15
while cap.isOpened():
ret, frame = cap.read()
if not ret:
break
# Prepocess the frame
if preprocess:
corrAngle, processed_frame = preprocess_image(frame)
else:
processed_frame = frame
corrAngle = 0
data = reader.readtext(processed_frame, batch_size=8, allowlist='./:()0123456789', width_ths=0.5, contrast_ths=0.05, text_threshold=0.9)
#optimBoxes = reader.detect(frame, optimal_num_chars = 2)
for recognised in data:
if isEtCO2:
recognisedValue = format_EtCO2(recognised[1])
else:
recognisedValue = recognised[1]
boxCoord = np.array(recognised[0],np.int32)
boxCoord = boxCoord.reshape((-1,1,2))
#labelRecognised = f'{recognised[1]}'
labelRecognised = recognisedValue
labelConfidence = f'{recognised[2]:.3f}'
frame = cv2.polylines(frame,[boxCoord],True,(0,255,255)) # draw a box around the recognised character
frame = cv2.putText(frame,labelRecognised,(boxCoord[0][0][0],boxCoord[0][0][1]+25), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (36, 255, 12), 3) # insert the recognised value
frame = cv2.putText(frame,labelConfidence,(boxCoord[0][0][0]-25,boxCoord[0][0][1]), cv2.FONT_HERSHEY_SIMPLEX, 0.3, (36, 255, 12), 1) # insert the confidence value
timestamp = (int(cap.get(cv2.CAP_PROP_POS_MSEC))/1000.0)
#detectedData({'Time (s)': timestamp,'Recognised': recognisedValue})
detectedData[timestamp] = recognisedValue
print(f'Array: {recognised}, Correction Angle: {corrAngle}, Recognised Value: {recognisedValue}')
if firstFrame:
firstFrame = False
# Show frame with the region selection for tracking
drawing = False
ix,iy = -1, -1
events = [i for i in dir(cv2) if 'EVENT' in i]
# Extract the region of interest
#
# for recogBox in optimBoxes[0]:
# for vertices in recogBox:
# optimBoxCoord = np.array(vertices,np.int32)
# frame = cv2.rectangle(frame,(optimBoxCoord[0],optimBoxCoord[2]),(optimBoxCoord[1],optimBoxCoord[3]),(0,255,255),2)
cv2.imshow('Number Recognition', frame)
cv2.imshow('Processed frame', processed_frame)
frameCnt += frameSkip
cap.set(cv2.CAP_PROP_POS_FRAMES, frameCnt)
if cv2.waitKey(1) & 0xFF == ord('q'):
break
cap.release()
cv2.destroyAllWindows
print(detectedData)
return(detectedData)
def write_to_csv(path_to_csv,detected_data):
with open(path_to_csv, 'w', newline='') as csvfile:
fieldnames = ['Time (s)'] + ['Detected']
writer = csv.DictWriter(csvfile, fieldnames=fieldnames)
writer.writeheader()
# shape rows correctly
row = {'Time (s)': None, 'Detected': None}
for time in detected_data.keys():
row['Time (s)'] = time
row['Detected'] = detected_data[time]
writer.writerow(row)
reader = easyocr.Reader(['en'])
path_to_video = r"C:\Users\erutkovs\OneDrive - University College London\MRes sVNS project\Human trial\human_trial_recordings\data_06012025_pat_14\video\Human 014 060125\014_sVNS_C_1.6mA 1ms 20Hz 30s~3.mp4"
detected_data = extract_text_from_video(path_to_video, 0, 0)
#write_to_csv('C:/Users/erutkovs/OneDrive - University College London/MRes sVNS project/Human trial/human_trial_recordings/data_30092024_pat_13/Human 013 300924/013_30092024_P~2.csv', detected_data)