-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathgetFeatures.py
361 lines (302 loc) · 12.7 KB
/
getFeatures.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
import os
import h5py
import numpy as np
from VGGNET import VGGNet
from ColourMoments import color_moments
import gc
import cv2
import numpy as np
import os
import joblib
from joblib.numpy_pickle_utils import xrange
from scipy.cluster.vq import *
from sklearn import preprocessing
from colorFeature import ColorDescriptor
from GLCM import *
from ShapeHistogram import *
import time
import hashing
start =time.time()
#中间写上代码块
# path = "dataset/"
# print("数据集:")
# print(os.listdir(path))
# 获取数据集所有图片
def getAllPics(lpath = "dataset/"):
path = lpath
image_paths = []
# 获取dataset/下所有数据集文件夹
folders = os.listdir(path)
# 遍历每个数据集
for folder in folders:
# print(folder)
# 获取该数据集下所有子文件夹
folders_1 = os.listdir(os.path.join(path, folder))
# 遍历每个子文件夹
for folder_1 in folders_1:
# 获取所有子文件夹下所有文件
ls = os.listdir(os.path.join(path, folder + "/", folder_1))
# 遍历所有文件
for image_path in ls:
# 如果是.jpg格式才收录
if image_path.endswith('jpg'):
# 路径连接
image_path = os.path.join(path, folder + "/", folder_1 + "/", image_path)
# print("正在获取图片 "+image_path)
# 存储
image_paths.append(image_path)
# 返回所有图片列表
return image_paths
def mainget(listget = getAllPics()):
# # 获取所有图片
img_list = listget
print("图片总数量:" + len(img_list).__str__() + "张")
print("--------------------------------------------------")
print(" 开始提取特征...... ")
print("--------------------------------------------------")
# ##################### VGG-16 #################################
# features = []
# names = []
#
# model = VGGNet()
# for i, img_path in enumerate(img_list):
# norm_feat = model.get_feat(img_path)
# img_name = img_path
# features.append(norm_feat)
# names.append(img_name)
# print("正在提取图像特征:第 %d 张 , 共 %d 张......." % ((i + 1), len(img_list)) + img_name)
#
# feats = np.array(features)
# # print(feats)
# # 用于存储提取特征的文件
# output = "index.h5"
#
# print("--------------------------------------------------")
# print(" 正在将提取到的特征数据存储到文件中......")
# print("--------------------------------------------------")
#
# h5f = h5py.File(output, 'w')
# h5f.create_dataset('dataset_1', data=features)
# h5f.create_dataset('dataset_2', data=np.string_(names))
# h5f.close()
# ##################### VGG-16 #################################
###################### dhash##################################################
print("准备提取哈希特征描述符......")
output = open("Feature Library/dhash.csv", "w")
print("开始提取哈希特征描述符......")
for image_path in img_list:
print("正在提取图像的哈希特征描述符:"+image_path)
# imageID唯一标注图片
imageID = image_path[image_path.find("dataset"):]
# print("imageID:"+imageID)
image = cv2.imread(image_path)
# 获取特征描述符,并转为list形式
features = hashing.convert_hash(hashing.dhash(image))
# 将特征描述符写入索引文件
# print(features)
# print(features)
output.write("%s,%s\n" % (imageID, str(features)))
print("哈希特征描述符完毕")
################################# dhash #############################################
###################### phash##################################################
print("准备提取p哈希特征描述符......")
output = open("Feature Library/phash.csv", "w")
print("开始提取p哈希特征描述符......")
for image_path in img_list:
print("正在提取图像的哈希特征描述符:"+image_path)
# imageID唯一标注图片
imageID = image_path[image_path.find("dataset"):]
# print("imageID:"+imageID)
image = cv2.imread(image_path)
# 获取特征描述符,并转为list形式
features = hashing.convert_hash(hashing.pHash(image))
# 将特征描述符写入索引文件
# print(features)
# print(features)
output.write("%s,%s\n" % (imageID, str(features)))
print("p哈希特征描述符完毕")
################################# phash #############################################
###################### ahash##################################################
print("准备提取a哈希特征描述符......")
output = open("Feature Library/ahash.csv", "w")
print("开始提取a哈希特征描述符......")
for image_path in img_list:
print("正在提取图像的哈希特征描述符:" + image_path)
# imageID唯一标注图片
imageID = image_path[image_path.find("dataset"):]
# print("imageID:"+imageID)
image = cv2.imread(image_path)
# 获取特征描述符,并转为list形式
features = hashing.convert_hash(hashing.aHash(image))
# 将特征描述符写入索引文件
# print(features)
# print(features)
output.write("%s,%s\n" % (imageID, str(features)))
print("a哈希特征描述符完毕")
################################# ahash #############################################
# ###################### 开始提取颜色矩描述符 #################################
# print("准备提取颜色特征描述符......")
# # 打开索引文件进行写入,默认为index.csv
# output = open("Feature Library/colormoment.csv", "w")
# print("开始提取图像颜色矩......")
# for image_path in img_list:
# print("正在提取图像的颜色矩:"+image_path)
# # imageID唯一标注图片
# imageID = image_path[image_path.find("dataset"):]
# # print("imageID:"+imageID)
# # image = cv2.imread(image_path)
# # 获取特征描述符,并转为list形式
# features = list(np.array(color_moments(image_path)))
# # 将特征描述符写入索引文件
# # print(features)
# features = [str(f) for f in features]
# # print(features)
# output.write("%s,%s\n" % (imageID, ",".join(features)))
# print("颜色矩描述符完毕")
# ################################# 颜色特征描述符完毕 #############################################
#
#
#
#
# ###################### 开始提取颜色特征描述符 #################################
# print("准备提取颜色特征描述符......")
# # 初始化颜色描述符
# cd = ColorDescriptor((8, 12, 3))
# # 打开索引文件进行写入,默认为index.csv
# output = open("Feature Library/colorhis.csv", "w")
# print("开始提取图像颜色特征描述符......")
# for image_path in img_list:
# print("正在提取图像的颜色特征描述符:"+image_path)
# # imageID唯一标注图片
# imageID = image_path[image_path.find("dataset"):]
# # print("imageID:"+imageID)
# image = cv2.imread(image_path)
# # 获取特征描述符,并转为list形式
# features = list(np.array(cd.describe(image)))
# # 将特征描述符写入索引文件
# # print(features)
# features = [str(f) for f in features]
# # print(features)
# output.write("%s,%s\n" % (imageID, ",".join(features)))
# print("颜色特征描述符完毕")
# ################################# 颜色特征述符完毕 #############################################
# ################################# 纹理特征 #############################################
# print("准备提取纹理特征描述符......")
# # 打开索引文件进行写入,默认为GLCM.csv
# output = open("Feature Library/GLCM.csv", "w+")
# print("开始提取纹理特征描述符......")
# for image_path in img_list:
# print("正在提取图像的纹理特征描述符:"+image_path)
# # imageID唯一标注图片
# imageID = image_path[image_path.find("dataset"):]
# # print("imageID:"+imageID)
# image = cv2.imread(image_path)
# # 获取特征描述符,并转为list形式
# features = list(np.array(getglcm(image)))
# # 将特征描述符写入索引文件
# # print(features)
# features = [str(f) for f in features]
# # print(features)
# output.write("%s,%s\n" % (imageID, ",".join(features)))
# print("纹理特征描述符完毕")
# ################################# 纹理特征 #############################################
# ################################# 边缘特征 #############################################
# print("准备提取边缘特征描述符......")
# # 打开索引文件进行写入,默认为GLCM.csv
# output = open("Feature Library/ShapeHis.csv", "w+")
# print("开始提取边缘特征描述符......")
# for image_path in img_list:
# print("正在提取图像的边缘特征描述符:"+image_path)
# # imageID唯一标注图片
# imageID = image_path[image_path.find("dataset"):]
# # print("imageID:"+imageID)
# image = cv2.imread(image_path)
# # 获取特征描述符,并转为list形式
# features = list(np.array(ft(image)))
# # 将特征描述符写入索引文件
# # print(features)
# features = [str(f) for f in features]
# # print(features)
# output.write("%s,%s\n" % (imageID, ",".join(features)))
# print("边缘特征描述符完毕")
# ################################# 边缘特征 #############################################
#
#
#
#
#
#
#
#
#
# ############################# 准备开始提取所有图片的sift特征 #####################################
#
# print("准备提取所有图片的sift特征......")
#
# # 设置聚类中心数
# numWords = 64
#
# # 创建特征提取和关键点检测器对象
# sift_det=cv2.SIFT_create()
#
# # 列出所有描述符的存储位置
# des_list=[] # 特征描述
#
# print("开始提取图像sift特征描述符......")
# s=0
# for image_path in img_list:
# print("正在提取图像的sift特征描述符:"+image_path)
# # 读取图片文件
# img = cv2.imread(image_path)
# # 将图像转换为灰度图
# gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
# # 检测关键点并计算描述符
# kp, des = sift_det.detectAndCompute(gray, None)
# des_list.append((image_path, des))
#
#
# # 将所有描述符垂直堆叠在一个 numpy 数组中
# descriptors = des_list[0][1]
# print('生成向量数组中......')
# count=1
# for image_path, descriptor in des_list[1:]:
# print(count)
# count+=1
# descriptors = np.vstack((descriptors, descriptor))
#
# # 执行 k-means clustering
# print ("开始 k-means 聚类: %d words, %d key points" %(numWords, descriptors.shape[0]))
# voc, variance = kmeans(descriptors, numWords, 1)
#
# # 计算特征的直方图
# print("计算特征直方图中......")
# im_features = np.zeros((len(img_list), numWords), "float32")
# # print(len(image_paths))
# # for i in range(len(image_paths)):
# for i in range(len(img_list)):
# words, distance = vq(des_list[i][1],voc)
# print(i)
# for w in words:
# im_features[i][w] += 1
#
# # 执行 Tf-Idf 矢量化
# print("进行Tf-Idf 矢量化中......")
# nbr_occurences = np.sum( (im_features > 0) * 1, axis = 0)
# idf = np.array(np.log((1.0*len(img_list)+1) / (1.0*nbr_occurences + 1)), 'float32')
#
# # Perform L2 normalization
# # 执行 L2 规范化
# print("正在进行归一化处理......")
# im_features = im_features*idf
# im_features = preprocessing.normalize(im_features, norm='l2')
#
# print('保存词袋模型文件中.......')
# joblib.dump((im_features, img_list, idf, numWords, voc), "Feature Library/bow.pkl", compress=3)
#
# print("sift特征提取完毕!")
#
# ################################# sift特征提取结束 #############################################
#
# print("特征描述符提取完毕!")
end = time.time()
print('Running time: %s Seconds'%(end-start))