-
Notifications
You must be signed in to change notification settings - Fork 5
/
Copy pathconvolutionClassifier.py
98 lines (76 loc) · 3.49 KB
/
convolutionClassifier.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
# Description: The idea behind this program is using an implementation of a
# bag of features, instead of classifying an image as a whole, we pass through
# the image using a dinamycally-sized "window", classifying each part of the
# image, much like a convolution filter in image processing. The idea is that
# this can classify multiple objects in a single image and information about
# where each objects lies within the image
import cv2
import numpy as np
import sys
from sklearn.externals import joblib
from sklearn.cluster import KMeans
from sklearn import svm
from copy import copy
# given a image returns its representation as an image histogram using
# the bag of features
def getFeaturesHist(img):
# gets keypoints and its descriptors using SIFT
kp, des = sift.detectAndCompute(img, None)
if len(kp) == 0: # no keypoints detected
return None
# mount the features histogram from the descriptors using the bag
features_hist = np.zeros(vocab_size)
word_vector = bag.predict(np.asarray(des, dtype=float))
# for each unique word
for word in np.unique(word_vector):
res = list(word_vector).count(word) # count the number of word in word_vector
features_hist[word] = res # increment the number of occurrences of it
return features_hist
def convolutionClassifier(img):
height, width = img.shape[:2]
k_boundaries = (height/8, width/8) # start with a 96x96 kernel size
curr_x, curr_y = (0, 0)
# iterates through the image taking slices of the image k_boundaries size
# and classifying them
while k_boundaries <= (height, width):
for curr_x in range(0, width - k_boundaries[1] + 1, k_boundaries[1]):
for curr_y in range(0, height - k_boundaries[0] + 1, k_boundaries[0]):
rgb_img = copy(original_img)
roi = img[curr_y:curr_y + k_boundaries[0],
curr_x:curr_x + k_boundaries[1]]
cv2.rectangle(rgb_img, (curr_x, curr_y),
(curr_x + k_boundaries[1], curr_y + k_boundaries[0]),
(0, 0, 255), 1)
roi_hist = getFeaturesHist(roi)
#
try:
label = classifier.predict([roi_hist])
if label[0] == 'bike':
print("I see:", classifier.predict([roi_hist]))
cv2.imshow("Image", rgb_img)
cv2.waitKey(0)
cv2.destroyAllWindows()
except ValueError:
pass
#print "Meeeh"
#cv2.imshow("ROI", roi)
#cv2.imshow("Image", rgb_img)
#cv2.waitKey(0)
#curr_x = curr_x + k_boundaries[1]
#curr_y = curr_y + k_boundaries[0]
#if curr_x + k_boundaries[1] == width or curr_y + k_boundaries[0] == height:
# curr_x = 0
# curr_y = 0
k_boundaries = (k_boundaries[0] * 2, k_boundaries[1] * 2)
if len(sys.argv) != 4:
print("Usage:", sys.argv[0], "<Bag-of-Features> <Classifier> <Image>")
sys.exit(1)
# retrieving arguments from command line
bag = joblib.load(sys.argv[1])
classifier = joblib.load(sys.argv[2])
original_img = cv2.imread(sys.argv[3])
# creating sift object
sift = cv2.SIFT_create()
vocab_size = len(set(bag.labels_)) # the total number of visual words in the bag
gray = cv2.cvtColor(original_img, cv2.COLOR_BGR2GRAY)
convolutionClassifier(gray)