-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdescriptor.py
247 lines (196 loc) · 8.7 KB
/
descriptor.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
from sklearn.preprocessing import scale
from collections import Counter
import scipy.ndimage.filters as filters
import numpy as np
import cv2
def map_depth_to_pc(depth, Kmat, MM_PER_M=1000.0):
cx = Kmat[0][2]
cy = Kmat[1][2]
fx = Kmat[0][0]
fy = Kmat[0][0]
depth = np.double(depth) # 480*640
h, w = depth.shape
# construct kinect map
depthMask = depth > 0.0
# convert depth image to 3D point clouds
xgrid = np.ones([h, 1])*range(w) - cx # 480*640, xgrid[i][:] = 0~640
s = np.reshape(np.arange(h), [1, h])
ygrid = np.transpose(s)*np.ones([w]) - cy # 480*640, ygrid[:][i] = 0~480
pc = np.zeros([h, w, 3])
pc[:, :, 0] = xgrid*depth/fx/MM_PER_M
pc[:, :, 1] = ygrid*depth/fy/MM_PER_M
pc[:, :, 2] = depth/MM_PER_M
return pc, depthMask
def fit_plane_for_pc(data):
'''
Fit a plane to the set of coordinates.
For a passed list of points in (x,y,z) cartesian coordinates,
find the plane that best fits the data, the unit vector normal to that plane
with an initial point at the average of the x, y, and z values.
'''
flag = np.zeros(3)
datadim = [[] for _ in range(3)]
n = [[] for _ in range(3)]
for i in range(3):
X = [[1., x[1], x[2]] for x in data]
# check wherther x_m can be solved
X_m = np.dot(np.transpose(X), X) # X' * X = |X|, 3*3
if np.linalg.det(X_m) == 0: # X_m is reversible
flag[i] = 0
continue
else:
flag[i] = 1
# Construct and normalize the normal vector
a = np.dot(np.linalg.pinv(X_m), np.transpose(X))
datadim[i] = [data[j][i] for j in range(len(data))]
coeff = np.dot(a, datadim[i])
c_neg = -coeff
c_neg[i] = 1
coeff[i] = 1
n[i] = c_neg / np.linalg.norm(coeff) # 3*3
if sum(flag) == 0:
return 0, 0
else:
# Calculating residuals for each fit
off_center = [datadim[0] - np.mean(datadim[0]), datadim[1] - np.mean(datadim[1]), datadim[2] - np.mean(datadim[2])]
residual_sum = np.zeros(3)
for i in range(3):
if flag[i] == 0:
residual_sum[i] = None
continue
residuals = np.dot(np.transpose(off_center), n[i])
residual_sum[i] = sum(residuals ** 2)
# find the lowest index
residual_sum = residual_sum.tolist()
best_fit = residual_sum.index(min(residual_sum))
nor = n[best_fit]
return 1, nor
def get_feature(patch_gray, patch_grad, patch_dp, mask, ngray, ngrad, ndp, ps):
def get_idx(patch, nblock):
patch = np.float64(patch)
patch = cv2.resize(patch, (ps, ps))
# if minimal<0, translate values > 0 since we use the relative order
mini = patch.min()
if mini <= 0:
trans = max(int(np.ceil(abs(mini))), 1) # >= 1
patch += trans
# turn square patch to circle
patch *= mask
# compute blocks' gaps for circle patch
val = np.unique(patch)
mini = val[1] # minimal val[0] is 0 from mask
maxi = val[-1]
gap = ((maxi - mini) / nblock) + 1e-6
# get the idx of values whose mask is True
divs = np.floor((patch - mini) / gap) + 1 # move ids >= 1 to differ from points whose mask is False
divs *= mask
divs = divs.flatten().astype(np.int).tolist()
divs = list(filter(lambda x: x > 0, divs)) # only save the points whose mask is True
return divs
# get idx for each patch
idx_gray = get_idx(patch_gray, ngray)
idx_grad = get_idx(patch_grad, ngrad)
idx_dp = get_idx(patch_dp, ndp)
cat = np.stack([idx_gray, idx_grad, idx_dp], axis=1)
cat = cat.astype(np.str).tolist() # turn to string for hash
cat = [' '.join(x) for x in cat]
cou = Counter(cat) # statistic
# build histogram
hist = np.zeros([ngray, ngrad, ndp])
for key in cou.keys():
id1, id2, id3 = key.split(' ')
hist[int(id1) - 1, int(id2) - 1, int(id3) - 1] = cou[key]
desp = hist.flatten()
return desp
def descriptor(grayImage, depthImage, kps, Kmat, ngray=20, ngrad=20, ndp=20, r=20, ps=40, sr=25, bg_thre=0.3):
'''
:param grayImage: int, [480, 640]
:param depthImage: int, [480, 640]
:param kps: int, [N,2], the coordinates of keypoints
:param Kmat: float, [4,4], the intrinsics matrix of the RGB-D camera
:param ngray: int, the number of gray blocks.
:param ngrad: int, the number of pointcloud gradient blocks
:param ndp: int, the number of dotpoint spatial blocks
:param r: int, the size of initial patch.
:param ps: int, the size of final patch. All patches are unified to [ps, ps]
:param sr: int, a radius used to select neighboring points
:param bg_thre: float, a threshold to check whether a point is a background one or not in neighbors
:return: kps: int, [N',2], the coordinates of keypoints
:return: desp: float, [N',D], the feature vectors of keypoints
'''
# the image preprocessing
h, w = grayImage.shape
gray = filters.gaussian_filter(grayImage, 1)
depth = filters.gaussian_filter(depthImage, 1)
pc, depthMask = map_depth_to_pc(depth, Kmat)
# generate a mask to turn square patch to circle patch
center = int(ps // 2)
patchMask = cv2.circle(np.zeros([ps, ps]), (center, center), center, (1, 1, 1), -1)
keypoints = []
features = []
for k in range(len(kps)):
u, v = kps[k]
x, y, z = pc[u, v]
if depthMask[u, v]: # if the depth of this keypoint exists
# estimate scale according to the empirical equation in BRAND
if 2 < z < 8:
s = max(0.2, (3.8-0.4*max(2.0, z))/3) # scale
r = int(round(r * s)) # modify the radius of initial patch
# filter distant points to get neighbors
minu = max(u - sr, 0)
maxu = min(u + sr, h)
minv = max(v - sr, 0)
maxv = min(v + sr, w)
patch = pc[minu:maxu, minv:maxv, :]
patch_mask = depthMask[minu:maxu, minv:maxv]
urange, vrange, _ = patch.shape
npoints = urange * vrange
patch_points = patch.reshape([npoints, 3])
neigh_m = abs(patch[:, :, 2] - z) < bg_thre # according to z-axis
dep_m = patch_mask != 0 # depth exists
neigh_m = (neigh_m * dep_m).flatten()
neighbors = [patch_points[i, :] for i in range(npoints) if neigh_m[i]]
if len(neighbors) >= 50: #2 * r**2:
try:
# get the dominant orientation
flag, normal = fit_plane_for_pc(neighbors)
if flag != 0: # if the plane of pc can be fit
n_w = [[normal[0]+x], [normal[1]+y], [normal[2]+z]]
# get the patch to be encoded
minu = max(u - r, 0)
maxu = min(u + r, h)
minv = max(v - r, 0)
maxv = min(v + r, w)
patch_pc = pc[minu:maxu, minv:maxv, :]
## get gray patch
patch_gray = gray[minu:maxu, minv:maxv]
## get spatial dotproduct patch
points = patch_pc.reshape([(maxu-minu)*(maxv-minv), 3])
patch_dp = np.dot(points, n_w)
patch_dp = patch_dp.reshape([maxu-minu, maxv-minv])
## get pc_grad patch
dx, dy, dz = np.gradient(patch_pc)
patch_grad = abs(dx[:, :, 0]) + abs(dx[:, :, 1]) + abs(dy[:, :, 0]) + abs(dy[:, :, 1]) + abs(dx[:, :, 2]) + abs(dy[:, :, 2]) + abs(dz[:, :, 1]) + abs(dz[:, :, 2])
desp = get_feature(patch_gray, patch_grad, patch_dp, patchMask, ngray, ngrad, ndp, ps)
keypoints.append([u, v])
features.append(desp)
except:
continue
feats = scale(features, axis=0, with_mean=True, with_std=True) # normalize
return keypoints, feats
if __name__ == '__main__':
import time
from detector import *
pathdir = './data/ManipulatorsDataset/'
Kmat = np.loadtxt(pathdir + 'camera-intrinsics.txt')
rgbfile = pathdir + 'mixture/illum/rgb_1.png'
depthfile = pathdir + 'mixture/illum/depth_1.png'
rgbImage = cv2.imread(rgbfile)
grayImage = cv2.imread(rgbfile, 0)
depthImage = cv2.imread(depthfile, cv2.IMREAD_UNCHANGED)
kps = detector(grayImage, depthImage, Kmat)
print(len(kps))
time1 = time.time()
kps, desp = descriptor(grayImage, depthImage, kps, Kmat)
print(time.time() - time1)
print(len(kps))