-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathrecognize_dogs.py
134 lines (105 loc) · 3.97 KB
/
recognize_dogs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
from __future__ import print_function
import sys
import cv2
import numpy as np
import matplotlib.pyplot as plt
from matplotlib.pyplot import imshow
from PIL import Image
import chainer
from chainer import cuda
import chainer.functions as F
from chainer.functions import caffe
def cnn_dog(photo_file, idx, categories_dog, func, gpu=-1, verbose=False,
save_csv=True):
in_size = 224
# Constant mean over spatial pixels
mean_image = np.ndarray((3, 256, 256), dtype=np.float32)
mean_image[0] = 104
mean_image[1] = 117
mean_image[2] = 123
cropwidth = 256 - in_size
start = cropwidth // 2
stop = start + in_size
mean_image = mean_image[:, start:stop, start:stop].copy()
target_shape = (256, 256)
output_side_length = 256
def forward(x, t):
y, = func(inputs={'data': x}, outputs=['loss3/classifier'],
disable=['loss1/ave_pool', 'loss2/ave_pool'],
train=False)
return F.softmax_cross_entropy(y, t), F.accuracy(y, t)
def predict(x):
y, = func(inputs={'data': x}, outputs=['loss3/classifier'],
disable=['loss1/ave_pool', 'loss2/ave_pool'],
train=False)
return F.softmax(y)
image = cv2.imread(photo_file)
height, width, depth = image.shape
new_height = output_side_length
new_width = output_side_length
if height > width:
new_height = output_side_length * height / width
else:
new_width = output_side_length * width / height
resized_img = cv2.resize(image, (new_width, new_height))
height_offset = (new_height - output_side_length) / 2
width_offset = (new_width - output_side_length) / 2
image = resized_img[height_offset:height_offset + output_side_length,
width_offset:width_offset + output_side_length]
image = image.transpose(2, 0, 1)
image = image[:, start:stop, start:stop].astype(np.float32)
image -= mean_image
x_batch = np.ndarray(
(1, 3, in_size, in_size), dtype=np.float32)
x_batch[0] = image
if gpu >= 0:
x_batch = cuda.to_gpu(x_batch)
x = chainer.Variable(x_batch, volatile=True)
score = predict(x)
if gpu >= 0:
score = cuda.to_cpu(score.data)
# print(score.data)
if save_csv:
np.savetxt('csvs/%d.csv' % (idx), score.data[0])
sd = np.argsort(score.data[0])[::-1]
# import pdb
# pdb.set_trace()
if verbose:
top_k = 5
prediction = zip(score.data[0].tolist(), categories)
prediction.sort(cmp=lambda x, y: cmp(x[0], y[0]), reverse=True)
# name_score = []
for rank, (score, name) in enumerate(prediction[:top_k], start=1):
print('#%d | %s | %4.1f%%' % (rank, name, score * 100))
# name_score.append([name, score])
# check if the top 20 labels have dog-relate ones
top_k = 5
for lb in sd[:top_k]:
if lb in categories_dog:
return True
break
return False
def recog_dogs(image):
imsize = np.array(image).shape[:-1]
k = 0
for i in range(imsize[1]/175):
for j in range(imsize[0]/175):
k+=1
image_c = image.crop([175*i, 175*j, 175*(i+1), 175*(j+1)])
image_c.save('%d%d.png' %(i,j))
imshow(np.asarray(image_c))
plt.show()
cnn_dog('%d%d.png' %(i,j), k, categories_dog, func, verbose=True)
###############################################################################
# initialization of the caffe model
categories = np.loadtxt("labels_dog.txt", str, delimiter="\t")
categories_dog = [i for i in range(
len(categories)) if 'dog' in categories[i]][:-2]
model = 'bvlc_googlenet.caffemodel'
gpu = -1
print('Loading Caffe model file %s...' % model, file=sys.stderr)
try:
func
except NameError:
func = caffe.CaffeFunction(model)
print('Loaded', file=sys.stderr)