dream.py

#!/usr/bin/env python

from __future__ import print_function
from argparse import ArgumentParser
from functools import partial
import PIL.Image
import numpy as np
import tensorflow as tf

#!wget https://storage.googleapis.com/download.tensorflow.org/models/inception5h.zip && unzip inception5h.zip

model_fn = 'models/tensorflow_inception_graph.pb'

# creating TensorFlow session and loading the model
graph = tf.Graph()
sess = tf.InteractiveSession(graph=graph)
with tf.gfile.FastGFile(model_fn, 'rb') as f:
    graph_def = tf.GraphDef()
    graph_def.ParseFromString(f.read())
t_input = tf.placeholder(np.float32, name='input')  # define the input tensor
imagenet_mean = 117.0
t_preprocessed = tf.expand_dims(t_input - imagenet_mean, 0)
tf.import_graph_def(graph_def, {'input': t_preprocessed})

layers = [op.name for op in graph.get_operations()
          if op.type == 'Conv2D' and 'import/' in op.name]
feature_nums = [int(graph.get_tensor_by_name(name + ':0').get_shape()[-1])
                for name in layers]
print('Number of layers:', len(layers))
print('Total number of feature channels:', sum(feature_nums))

# start with a gray image with a little noise
img_noise = np.random.uniform(size=(224, 224, 3)) + 100.0


def savearray(a, filename, fmt='jpeg'):
    if '.png' in filename.lower():
        fmt = 'png'
    a = np.uint8(np.clip(a, 0, 1) * 255)
    PIL.Image.fromarray(a).save('result/' + filename, fmt)


def T(layer):
    '''Helper for getting layer output tensor'''
    return graph.get_tensor_by_name("import/%s:0" % layer)


def tffunc(*argtypes):
    '''Helper that transforms TF-graph generating function into a regular one.
    See "resize" function below.
    '''
    placeholders = list(map(tf.placeholder, argtypes))

    def wrap(f):
        out = f(*placeholders)

        def wrapper(*args, **kw):
            return out.eval(dict(zip(placeholders, args)),
                            session=kw.get('session'))
        return wrapper
    return wrap


def resize(img, size):
    '''Helper function that uses TF to resize an image'''
    img = tf.expand_dims(img, 0)
    return tf.image.resize_bilinear(img, size)[0, :, :, :]


resize = tffunc(np.float32, np.int32)(resize)


def calc_grad_tiled(img, t_grad, tile_size=512):
    '''Compute the value of tensor t_grad over the image in a tiled way.
    Random shifts are applied to the image to blur tile boundaries over
    multiple iterations.'''
    sz = tile_size
    h, w = img.shape[:2]
    sx, sy = np.random.randint(sz, size=2)
    img_shift = np.roll(np.roll(img, sx, 1), sy, 0)
    grad = np.zeros_like(img)
    for y in range(0, max(h - sz // 2, sz), sz):
        for x in range(0, max(w - sz // 2, sz), sz):
            sub = img_shift[y:y + sz, x:x + sz]
            g = sess.run(t_grad, {t_input: sub})
            grad[y:y + sz, x:x + sz] = g
    return np.roll(np.roll(grad, -sx, 1), -sy, 0)


k = np.float32([1, 4, 6, 4, 1])
k = np.outer(k, k)
k5x5 = k[:, :, None, None] / k.sum() * np.eye(3, dtype=np.float32)


def lap_split(img):
    '''Split the image into lo and hi frequency components'''
    with tf.name_scope('split'):
        lo = tf.nn.conv2d(img, k5x5, [1, 2, 2, 1], 'SAME')
        lo2 = tf.nn.conv2d_transpose(lo, k5x5 * 4, tf.shape(img), [1, 2, 2, 1])
        hi = img - lo2
    return lo, hi


def lap_split_n(img, n):
    '''Build Laplacian pyramid with n splits'''
    levels = []
    for i in range(n):
        img, hi = lap_split(img)
        levels.append(hi)
    levels.append(img)
    return levels[::-1]


def lap_merge(levels):
    '''Merge Laplacian pyramid'''
    img = levels[0]
    for hi in levels[1:]:
        with tf.name_scope('merge'):
            img = tf.nn.conv2d_transpose(img, k5x5 * 4, tf.shape(hi),
                                         [1, 2, 2, 1]) + hi
    return img


def normalize_std(img, eps=1e-10):
    '''Normalize image by making its standard deviation = 1.0'''
    with tf.name_scope('normalize'):
        std = tf.sqrt(tf.reduce_mean(tf.square(img)))
        return img / tf.maximum(std, eps)


def lap_normalize(img, scale_n=4):
    '''Perform the Laplacian pyramid normalization.'''
    img = tf.expand_dims(img, 0)
    tlevels = lap_split_n(img, scale_n)
    tlevels = list(map(normalize_std, tlevels))
    out = lap_merge(tlevels)
    return out[0, :, :, :]


def render_deepdream(t_obj, img0=img_noise, filename='out.jpg',
                     iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
    # defining the optimization objective
    t_score = tf.reduce_mean(t_obj)
    # behold the power of automatic differentiation!
    t_grad = tf.gradients(t_score, t_input)[0]

    # split the image into a number of octaves
    img = img0
    octaves = []
    for i in range(octave_n - 1):
        hw = img.shape[:2]
        lo = resize(img, np.int32(np.float32(hw) / octave_scale))
        hi = img - resize(lo, hw)
        img = lo
        octaves.append(hi)

    # generate details octave by octave
    for octave in range(octave_n):
        print('Octave ', octave + 1, end=' ')
        if octave > 0:
            hi = octaves[-octave]
            img = resize(img, hi.shape[:2]) + hi
        for i in range(iter_n):
            g = calc_grad_tiled(img, t_grad)
            img += g * (step / (np.abs(g).mean() + 1e-7))
            print('.', end='')
        print('')

    # save the image
    savearray(img / 255.0, filename)
    print(filename, 'saved')


def render_lap_deepdream(t_obj, img0=img_noise, filename='out.jpg', lap_n=4,
                         iter_n=10, step=1.5, octave_n=4, octave_scale=1.4):
    # defining the optimization objective
    t_score = tf.reduce_mean(t_obj)
    # behold the power of automatic differentiation!
    t_grad = tf.gradients(t_score, t_input)[0]
    # build the laplacian normalization graph
    lap_norm_func = tffunc(np.float32)(partial(lap_normalize, scale_n=lap_n))

    # split the image into a number of octaves
    img = img0
    octaves = []
    for i in range(octave_n - 1):
        hw = img.shape[:2]
        lo = resize(img, np.int32(np.float32(hw) / octave_scale))
        hi = img - resize(lo, hw)
        img = lo
        octaves.append(hi)

    # generate details octave by octave
    for octave in range(octave_n):
        print('Octave ', octave + 1, end=' ')
        if octave > 0:
            hi = octaves[-octave]
            img = resize(img, hi.shape[:2]) + hi
        for i in range(iter_n):
            g = calc_grad_tiled(img, t_grad)
            g = lap_norm_func(g)
            # img += g * step
            img += g * (step / (np.abs(g).mean() + 1e-7))
            print('.', end='')
        print('')

    # save the image
    savearray(img / 255.0, filename)
    print(filename, 'saved')


def main():
    layer_name = 'mixed5b_3x3_bottleneck_pre_relu'
    img0_name = 'pilatus800.jpg'

    parser = ArgumentParser()
    parser.add_argument('-l', '--layer',
                        help='layer name',
                        default=layer_name)
    parser.add_argument('-i', '--image',
                        help='source image',
                        default=img0_name)
    parser.add_argument('-o', '--output',
                        help='output file',
                        default='out.jpg')
    parser.add_argument('--lap', dest='lap_n',
                        help='lap_n (0 for not using Laplacian pyramid)',
                        default=4, type=int)
    parser.add_argument('--iter', dest='iter_n',
                        help='iter_n',
                        default=10, type=int)
    parser.add_argument('--step', dest='step',
                        help='step',
                        default=1.5, type=float)
    parser.add_argument('--octave', dest='octave_n',
                        help='octave_n',
                        default=4, type=int)
    parser.add_argument('--scale', dest='octave_scale',
                        help='octave_scale',
                        default=1.4, type=float)
    args = parser.parse_args()

    # use convert('RGB') to fix png (RGBA) problem
    img0 = PIL.Image.open(args.image).convert('RGB')
    img0 = np.float32(img0)
    if args.lap_n > 0:
        render_lap_deepdream(tf.square(T(args.layer)), lap_n=args.lap_n,
                             img0=img0, filename=args.output,
                             iter_n=args.iter_n, step=args.step,
                             octave_n=args.octave_n,
                             octave_scale=args.octave_scale)
    else:
        render_deepdream(tf.square(T(args.layer)),
                         img0=img0, filename=args.output,
                         iter_n=args.iter_n, step=args.step,
                         octave_n=args.octave_n,
                         octave_scale=args.octave_scale)


if __name__ == '__main__':
    main()