-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathhandle_models.py
109 lines (91 loc) · 3.42 KB
/
handle_models.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import cv2
import numpy as np
def handle_pose(output, input_shape):
'''
Handles the output of the Pose Estimation model.
Returns ONLY the keypoint heatmaps, and not the Part Affinity Fields.
'''
# TODO 1: Extract only the second blob output (keypoint heatmaps)
heatmaps = output['Mconv7_stage2_L2']
# TODO 2: Resize the heatmap back to the size of the input
# Create an empty array to handle the output map
out_heatmap = np.zeros([heatmaps.shape[1], input_shape[0], input_shape[1]])
# Iterate through and re-size each heatmap
for h in range(len(heatmaps[0])):
out_heatmap[h] = cv2.resize(heatmaps[0][h], input_shape[0:2][::-1])
return out_heatmap
def handle_text(output, input_shape):
'''
Handles the output of the Text Detection model.
Returns ONLY the text/no text classification of each pixel,
and not the linkage between pixels and their neighbors.
'''
# TODO 1: Extract only the first blob output (text/no text classification)
text_classes = output['model/segm_logits/add']
# TODO 2: Resize this output back to the size of the input
out_text = np.empty([text_classes.shape[1], input_shape[0], input_shape[1]])
for t in range(len(text_classes[0])):
out_text[t] = cv2.resize(text_classes[0][t], input_shape[0:2][::-1])
return out_text
def handle_car(output, input_shape):
'''
Handles the output of the Car Metadata model.
Returns two integers: the argmax of each softmax output.
The first is for color, and the second for type.
'''
# Get rid of unnecessary dimensions
color = output['color'].flatten()
car_type = output['type'].flatten()
# TODO 1: Get the argmax of the "color" output
color_pred = np.argmax(color)
# TODO 2: Get the argmax of the "type" output
type_pred = np.argmax(car_type)
return color_pred, type_pred
def handle_dect(output, input_shape):
'''
Handles the output of the Car Metadata model.
Returns two integers: the argmax of each softmax output.
The first is for color, and the second for type.
'''
# Get rid of unnecessary dimensions
age = output['age_conv3'].flatten()
gender = output['prob'].flatten()
# TODO 1: Get the argmax of the "color" output
age_pred = np.argmax(age)
# TODO 2: Get the argmax of the "type" output
gender_pred = np.argmax(gender)
print(age_pred)
print(gender_pred)
return age_pred, gender_pred
def handle_output(model_type):
'''
Returns the related function to handle an output,
based on the model_type being used.
'''
if model_type == "POSE":
return handle_pose
elif model_type == "TEXT":
return handle_text
elif model_type == "CAR_META":
return handle_car
elif model_type == "DETECTION":
return handle_dect
else:
return None
'''
The below function is carried over from the previous exercise.
You just need to call it appropriately in `app.py` to preprocess
the input image.
'''
def preprocessing(input_image, height, width):
'''
Given an input image, height and width:
- Resize to width and height
- Transpose the final "channel" dimension to be first
- Reshape the image to add a "batch" of 1 at the start
'''
image = np.copy(input_image)
image = cv2.resize(image, (width, height))
image = image.transpose((2,0,1))
image = image.reshape(1, 3, height, width)
return image