-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathdream.py
261 lines (213 loc) · 8.58 KB
/
dream.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
#!/usr/bin/env python
from __future__ import print_function
from argparse import ArgumentParser
from functools import partial
import PIL.Image
import numpy as np
import tensorflow as tf
#!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip
model_fn = 'models/tensorflow_inception_graph.pb'
# creating TensorFlow session and loading the model
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input') # define the input tensor
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})
layers = [op.name for op in graph.get_operations()
if op.type == 'Conv2D' and 'import/' in op.name]
feature_nums = [int(graph.get_tensor_by_name(name + ':0').get_shape()[-1])
for name in layers]
print('Number of layers:', len(layers))
print('Total number of feature channels:', sum(feature_nums))
# start with a gray image with a little noise
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0
def savearray(a, filename, fmt='jpeg'):
if '.png' in filename.lower():
fmt = 'png'
a = np.uint8(np.clip(a, 0, 1) * 255)
PIL.Image.fromarray(a).save('result/' + filename, fmt)
def T(layer):
'''Helper for getting layer output tensor'''
return graph.get_tensor_by_name("import/%s:0" % layer)
def tffunc(*argtypes):
'''Helper that transforms TF-graph generating function into a regular one.
See "resize" function below.
'''
placeholders = list(map(tf.placeholder, argtypes))
def wrap(f):
out = f(*placeholders)
def wrapper(*args, **kw):
return out.eval(dict(zip(placeholders, args)),
session=kw.get('session'))
return wrapper
return wrap
def resize(img, size):
'''Helper function that uses TF to resize an image'''
img = tf.expand_dims(img, 0)
return tf.image.resize_bilinear(img, size)[0, :, :, :]
resize = tffunc(np.float32, np.int32)(resize)
def calc_grad_tiled(img, t_grad, tile_size=512):
'''Compute the value of tensor t_grad over the image in a tiled way.
Random shifts are applied to the image to blur tile boundaries over
multiple iterations.'''
sz = tile_size
h, w = img.shape[:2]
sx, sy = np.random.randint(sz, size=2)
img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
grad = np.zeros_like(img)
for y in range(0, max(h - sz // 2, sz), sz):
for x in range(0, max(w - sz // 2, sz), sz):
sub = img_shift[y:y + sz, x:x + sz]
g = sess.run(t_grad, {t_input: sub})
grad[y:y + sz, x:x + sz] = g
return np.roll(np.roll(grad, -sx, 1), -sy, 0)
k = np.float32([1, 4, 6, 4, 1])
k = np.outer(k, k)
k5x5 = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32)
def lap_split(img):
'''Split the image into lo and hi frequency components'''
with tf.name_scope('split'):
lo = tf.nn.conv2d(img, k5x5, [1, 2, 2, 1], 'SAME')
lo2 = tf.nn.conv2d_transpose(lo, k5x5 * 4, tf.shape(img), [1, 2, 2, 1])
hi = img - lo2
return lo, hi
def lap_split_n(img, n):
'''Build Laplacian pyramid with n splits'''
levels = []
for i in range(n):
img, hi = lap_split(img)
levels.append(hi)
levels.append(img)
return levels[::-1]
def lap_merge(levels):
'''Merge Laplacian pyramid'''
img = levels[0]
for hi in levels[1:]:
with tf.name_scope('merge'):
img = tf.nn.conv2d_transpose(img, k5x5 * 4, tf.shape(hi),
[1, 2, 2, 1]) + hi
return img
def normalize_std(img, eps=1e-10):
'''Normalize image by making its standard deviation = 1.0'''
with tf.name_scope('normalize'):
std = tf.sqrt(tf.reduce_mean(tf.square(img)))
return img / tf.maximum(std, eps)
def lap_normalize(img, scale_n=4):
'''Perform the Laplacian pyramid normalization.'''
img = tf.expand_dims(img, 0)
tlevels = lap_split_n(img, scale_n)
tlevels = list(map(normalize_std, tlevels))
out = lap_merge(tlevels)
return out[0, :, :, :]
def render_deepdream(t_obj, img0=img_noise, filename='out.jpg',
iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
# defining the optimization objective
t_score = tf.reduce_mean(t_obj)
# behold the power of automatic differentiation!
t_grad = tf.gradients(t_score, t_input)[0]
# split the image into a number of octaves
img = img0
octaves = []
for i in range(octave_n - 1):
hw = img.shape[:2]
lo = resize(img, np.int32(np.float32(hw) / octave_scale))
hi = img - resize(lo, hw)
img = lo
octaves.append(hi)
# generate details octave by octave
for octave in range(octave_n):
print('Octave ', octave + 1, end=' ')
if octave > 0:
hi = octaves[-octave]
img = resize(img, hi.shape[:2]) + hi
for i in range(iter_n):
g = calc_grad_tiled(img, t_grad)
img += g * (step / (np.abs(g).mean() + 1e-7))
print('.', end='')
print('')
# save the image
savearray(img / 255.0, filename)
print(filename, 'saved')
def render_lap_deepdream(t_obj, img0=img_noise, filename='out.jpg', lap_n=4,
iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
# defining the optimization objective
t_score = tf.reduce_mean(t_obj)
# behold the power of automatic differentiation!
t_grad = tf.gradients(t_score, t_input)[0]
# build the laplacian normalization graph
lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))
# split the image into a number of octaves
img = img0
octaves = []
for i in range(octave_n - 1):
hw = img.shape[:2]
lo = resize(img, np.int32(np.float32(hw) / octave_scale))
hi = img - resize(lo, hw)
img = lo
octaves.append(hi)
# generate details octave by octave
for octave in range(octave_n):
print('Octave ', octave + 1, end=' ')
if octave > 0:
hi = octaves[-octave]
img = resize(img, hi.shape[:2]) + hi
for i in range(iter_n):
g = calc_grad_tiled(img, t_grad)
g = lap_norm_func(g)
# img += g * step
img += g * (step / (np.abs(g).mean() + 1e-7))
print('.', end='')
print('')
# save the image
savearray(img / 255.0, filename)
print(filename, 'saved')
def main():
layer_name = 'mixed5b_3x3_bottleneck_pre_relu'
img0_name = 'pilatus800.jpg'
parser = ArgumentParser()
parser.add_argument('-l', '--layer',
help='layer name',
default=layer_name)
parser.add_argument('-i', '--image',
help='source image',
default=img0_name)
parser.add_argument('-o', '--output',
help='output file',
default='out.jpg')
parser.add_argument('--lap', dest='lap_n',
help='lap_n (0 for not using Laplacian pyramid)',
default=4, type=int)
parser.add_argument('--iter', dest='iter_n',
help='iter_n',
default=10, type=int)
parser.add_argument('--step', dest='step',
help='step',
default=1.5, type=float)
parser.add_argument('--octave', dest='octave_n',
help='octave_n',
default=4, type=int)
parser.add_argument('--scale', dest='octave_scale',
help='octave_scale',
default=1.4, type=float)
args = parser.parse_args()
# use convert('RGB') to fix png (RGBA) problem
img0 = PIL.Image.open(args.image).convert('RGB')
img0 = np.float32(img0)
if args.lap_n > 0:
render_lap_deepdream(tf.square(T(args.layer)), lap_n=args.lap_n,
img0=img0, filename=args.output,
iter_n=args.iter_n, step=args.step,
octave_n=args.octave_n,
octave_scale=args.octave_scale)
else:
render_deepdream(tf.square(T(args.layer)),
img0=img0, filename=args.output,
iter_n=args.iter_n, step=args.step,
octave_n=args.octave_n,
octave_scale=args.octave_scale)
if __name__ == '__main__':
main()