-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathdeformable_convolution.py
181 lines (140 loc) · 7.6 KB
/
deformable_convolution.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
# coding: utf-8
import numpy as np
import tensorflow as tf
class DCN(object):
"""
初始化函数
只有两个参数:
input_shape:输入的input feature map的shape,是一个1*4的list
kernel_size:卷积核的尺寸,是一个1*2的list
"""
def __init__(self, input_shape, kernel_size):
# 定义deform_conv的kernel size
self.kernel_size = kernel_size
self.num_points = kernel_size[0]*kernel_size[1]
# 定义input feature map的shape参数
self.num_batch = input_shape[0]
self.height = input_shape[1]
self.width = input_shape[2]
self.num_channels = input_shape[3]
self.extend_scope = 2.0
"""
函数_coordinate_map(self, offset_field)用于生成3W*3H的coordinate map
输入:offset field
输出:3W*3H的coordinate map
"""
def _coordinate_map(self, offset_field, name):
with tf.variable_scope(name+"/_coordinate_map"):
# offset矩阵
x_offset, y_offset = tf.split(tf.reshape(offset_field, [self.num_batch, self.height, self.width, 2, self.num_points]),2,3)
x_offset = tf.squeeze(x_offset)
y_offset = tf.squeeze(y_offset)
# 中心点坐标矩阵
x_center = tf.reshape(tf.tile(tf.range(self.width),[self.height]),[self.height*self.width,-1])
x_center = tf.tile(x_center,[1,self.num_points])
x_center = tf.reshape(x_center,[self.height,self.width,self.num_points])
x_center = tf.tile(tf.expand_dims(x_center, 0), [self.num_batch,1,1,1])
y_center = tf.tile(tf.range(self.height),[self.width])
y_center = tf.transpose(tf.reshape(y_center, [self.width,self.height]))
y_center = tf.reshape(y_center,[self.height*self.width,-1])
y_center = tf.tile(y_center,[1,self.num_points])
y_center = tf.reshape(y_center,[self.height,self.width,self.num_points])
y_center = tf.tile(tf.expand_dims(y_center, 0), [self.num_batch,1,1,1])
x_center = tf.cast(x_center,"float32")
y_center = tf.cast(y_center,"float32")
# regular grid R矩阵
x = tf.linspace(-(self.kernel_size[0]-1)/2, (self.kernel_size[0]-1)/2, self.kernel_size[0])
y = tf.linspace(-(self.kernel_size[1]-1)/2, (self.kernel_size[1]-1)/2, self.kernel_size[1])
x,y = tf.meshgrid(x,y)
x_spread = tf.transpose(tf.reshape(x,(-1,1)))
y_spread = tf.transpose(tf.reshape(y,(-1,1)))
x_grid = tf.tile(x_spread,[1,self.height*self.width])
x_grid = tf.reshape(x_grid, [self.height, self.width, self.num_points])
y_grid = tf.tile(y_spread,[1,self.height*self.width])
y_grid = tf.reshape(y_grid, [self.height, self.width, self.num_points])
x_grid = tf.tile(tf.expand_dims(x_grid, 0), [self.num_batch,1,1,1])
y_grid = tf.tile(tf.expand_dims(y_grid, 0), [self.num_batch,1,1,1])
# 计算得到X,Y
x = tf.add_n([x_center, x_grid, tf.multiply(self.extend_scope, x_offset)])
y = tf.add_n([y_center, y_grid, tf.multiply(self.extend_scope, y_offset)])
# 将N*H*W*num_points转换为N*3H*3W
x_new = tf.reshape(x,[self.num_batch,self.height,self.width,self.kernel_size[0],self.kernel_size[1]])
x_new = tf.squeeze(tf.split(x_new,self.height,1))
x_new = tf.squeeze(tf.split(x_new,self.kernel_size[0],3))
x_new = tf.reshape(tf.split(x_new,self.height,1),[self.kernel_size[0]*self.height,self.num_batch,self.kernel_size[1]*self.width])
x_new = tf.squeeze(tf.split(x_new,self.num_batch,1))
y_new = tf.reshape(y,[self.num_batch,self.height,self.width,self.kernel_size[0],self.kernel_size[1]])
y_new = tf.squeeze(tf.split(y_new,self.height,1))
y_new = tf.squeeze(tf.split(y_new,self.kernel_size[0],3))
y_new = tf.reshape(tf.split(y_new,self.height,1),[self.kernel_size[0]*self.height,self.num_batch,self.kernel_size[1]*self.width])
y_new = tf.squeeze(tf.split(y_new,self.num_batch,1))
return x_new, y_new
"""
函数_bilinear_interpolate(self, input_feature, coordinate_map)用于进行双线性插值
输入:input feature map;coordinate map
输出:3W*3H*C1的deformed feature map
"""
def _bilinear_interpolate(self, input_feature, x, y, name):
with tf.variable_scope(name+"/_bilinear_interpolate"):
# 一维数据
x = tf.reshape(x, [-1])
y = tf.reshape(y, [-1])
# 数据类型转换
x = tf.cast(x, "float32")
y = tf.cast(y, "float32")
zero = tf.zeros([], dtype="int32")
max_x = tf.cast(self.width-1, "int32")
max_y = tf.cast(self.height-1, "int32")
# 找到四个格点
x0 = tf.cast(tf.floor(x), "int32")
x1 = x0+1
y0 = tf.cast(tf.floor(y), "int32")
y1 = y0+1
# 截断feature map以外的点
x0 = tf.clip_by_value(x0, zero, max_x)
x1 = tf.clip_by_value(x1, zero, max_x)
y0 = tf.clip_by_value(y0, zero, max_y)
y1 = tf.clip_by_value(y1, zero, max_y)
# 把input_feature和coordinate X和Y都转换为二维,方便从中抽取值
input_feature_flat = tf.reshape(input_feature, tf.stack([-1, self.num_channels]))
dimension_2 = self.width
dimension_1 = self.width*self.height
base = tf.range(self.num_batch)*dimension_1
repeat = tf.transpose(tf.expand_dims(tf.ones(shape=(tf.stack([self.num_points*self.height*self.width,]))),1),[1,0])
repeat = tf.cast(repeat,"int32")
base = tf.matmul(tf.reshape(base,(-1,1)),repeat)
base = tf.reshape(base, [-1])
base_y0 = base+y0*dimension_2
base_y1 = base+y1*dimension_2
index_a = base_y0+x0
index_b = base_y1+x0
index_c = base_y0+x1
index_d = base_y1+x1
# 计算四个格点的value
value_a = tf.gather(input_feature_flat, index_a)
value_b = tf.gather(input_feature_flat, index_b)
value_c = tf.gather(input_feature_flat, index_c)
value_d = tf.gather(input_feature_flat, index_d)
# 计算四个面积
x0_float = tf.cast(x0, "float32")
x1_float = tf.cast(x1, "float32")
y0_float = tf.cast(y0, "float32")
y1_float = tf.cast(y1, "float32")
area_a = tf.expand_dims(((x1_float-x)*(y1_float-y)),1)
area_b = tf.expand_dims(((x1_float-x)*(y-y0_float)),1)
area_c = tf.expand_dims(((x-x0_float)*(y1_float-y)),1)
area_d = tf.expand_dims(((x-x0_float)*(y-y0_float)),1)
# 相乘相加
outputs = tf.add_n([value_a*area_a, value_b*area_b, value_c*area_c, value_d*area_d])
outputs = tf.reshape(outputs, [self.num_batch, self.kernel_size[0]*self.height, self.kernel_size[1]*self.width, self.num_channels])
return outputs
"""
函数deform_conv(self, inputs)用于进行deformable convolution操作
输入:input feature map
输出:output feature map
"""
def deform_conv(self, inputs, offset, name, **kwargs):
with tf.variable_scope(name+"/DeformedFeature"):
x, y = self._coordinate_map(offset, name)
deformed_feature = self._bilinear_interpolate(inputs, x, y, name)
return deformed_feature