diff --git a/inception_v3.py b/inception_v3.py
index 4fca301..dbb34d1 100644
--- a/inception_v3.py
+++ b/inception_v3.py
@@ -1,160 +1,242 @@
 # -*- coding: utf-8 -*-
-'''Inception V3 model for Keras.
+"""Inception V3 model for Keras.
 
-Note that the ImageNet weights provided are from a model that had not fully converged.
-Inception v3 should be able to reach 6.9% top-5 error, but our model
-only gets to 7.8% (same as a fully-converged ResNet 50).
-For comparison, VGG16 only gets to 9.9%, quite a bit worse.
+Note that the input image format for this model is different than for
+the VGG16 and ResNet models (299x299 instead of 224x224),
+and that the input preprocessing function is also different (same as Xception).
 
-Also, do note that the input image format for this model is different than for
-other models (299x299 instead of 224x224), and that the input preprocessing function
-is also different.
-
-# Reference:
+# Reference
 
 - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567)
 
-'''
+"""
 from __future__ import print_function
+from __future__ import absolute_import
 
-import numpy as np
 import warnings
+import numpy as np
 
 from keras.models import Model
-from keras.layers import Flatten, Dense, Input, BatchNormalization, merge
-from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D
-from keras.preprocessing import image
+from keras import layers
+from keras.layers import Activation
+from keras.layers import Dense
+from keras.layers import Input
+from keras.layers import BatchNormalization
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import AveragePooling2D
+from keras.layers import GlobalAveragePooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.engine.topology import get_source_inputs
 from keras.utils.layer_utils import convert_all_kernels_in_model
 from keras.utils.data_utils import get_file
 from keras import backend as K
-from imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import _obtain_input_shape
+from keras.preprocessing import image
 
 
-TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels.h5'
-TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_tf_dim_ordering_tf_kernels.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels_notop.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
+WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5'
+WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
 
-def conv2d_bn(x, nb_filter, nb_row, nb_col,
-              border_mode='same', subsample=(1, 1),
+def conv2d_bn(x,
+              filters,
+              num_row,
+              num_col,
+              padding='same',
+              strides=(1, 1),
               name=None):
-    '''Utility function to apply conv + BN.
-    '''
+    """Utility function to apply conv + BN.
+
+    Arguments:
+        x: input tensor.
+        filters: filters in `Conv2D`.
+        num_row: height of the convolution kernel.
+        num_col: width of the convolution kernel.
+        padding: padding mode in `Conv2D`.
+        strides: strides in `Conv2D`.
+        name: name of the ops; will become `name + '_conv'`
+            for the convolution and `name + '_bn'` for the
+            batch norm layer.
+
+    Returns:
+        Output tensor after applying `Conv2D` and `BatchNormalization`.
+    """
     if name is not None:
         bn_name = name + '_bn'
         conv_name = name + '_conv'
     else:
         bn_name = None
         conv_name = None
-    if K.image_dim_ordering() == 'th':
+    if K.image_data_format() == 'channels_first':
         bn_axis = 1
     else:
         bn_axis = 3
-    x = Convolution2D(nb_filter, nb_row, nb_col,
-                      subsample=subsample,
-                      activation='relu',
-                      border_mode=border_mode,
-                      name=conv_name)(x)
-    x = BatchNormalization(axis=bn_axis, name=bn_name)(x)
+    x = Conv2D(
+        filters, (num_row, num_col),
+        strides=strides,
+        padding=padding,
+        use_bias=False,
+        name=conv_name)(x)
+    x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x)
+    x = Activation('relu', name=name)(x)
     return x
 
 
-def InceptionV3(include_top=True, weights='imagenet',
-                input_tensor=None):
-    '''Instantiate the Inception v3 architecture,
-    optionally loading weights pre-trained
+def InceptionV3(include_top=True,
+                weights='imagenet',
+                input_tensor=None,
+                input_shape=None,
+                pooling=None,
+                classes=1000):
+    """Instantiates the Inception v3 architecture.
+
+    Optionally loads weights pre-trained
     on ImageNet. Note that when using TensorFlow,
     for best performance you should set
-    `image_dim_ordering="tf"` in your Keras config
+    `image_data_format="channels_last"` in your Keras config
     at ~/.keras/keras.json.
-
     The model and the weights are compatible with both
-    TensorFlow and Theano. The dimension ordering
+    TensorFlow and Theano. The data format
     convention used by the model is the one
     specified in your Keras config file.
-
     Note that the default input image size for this model is 299x299.
 
-    # Arguments
-        include_top: whether to include the 3 fully-connected
-            layers at the top of the network.
+    Arguments:
+        include_top: whether to include the fully-connected
+            layer at the top of the network.
         weights: one of `None` (random initialization)
             or "imagenet" (pre-training on ImageNet).
         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
             to use as image input for the model.
-
-    # Returns
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(299, 299, 3)` (with `channels_last` data format)
+            or `(3, 299, 299)` (with `channels_first` data format).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 139.
+            E.g. `(150, 150, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a 2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is True, and
+            if no `weights` argument is specified.
+
+    Returns:
         A Keras model instance.
-    '''
+
+    Raises:
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+    """
     if weights not in {'imagenet', None}:
         raise ValueError('The `weights` argument should be either '
                          '`None` (random initialization) or `imagenet` '
                          '(pre-training on ImageNet).')
+
+    if weights == 'imagenet' and include_top and classes != 1000:
+        raise ValueError('If using `weights` as imagenet with `include_top`'
+                         ' as true, `classes` should be 1000')
+
     # Determine proper input shape
-    if K.image_dim_ordering() == 'th':
-        if include_top:
-            input_shape = (3, 299, 299)
-        else:
-            input_shape = (3, None, None)
-    else:
-        if include_top:
-            input_shape = (299, 299, 3)
-        else:
-            input_shape = (None, None, 3)
+    input_shape = _obtain_input_shape(
+        input_shape,
+        default_size=299,
+        min_size=139,
+        data_format=K.image_data_format(),
+        include_top=include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
     else:
-        if not K.is_keras_tensor(input_tensor):
-            img_input = Input(tensor=input_tensor)
-        else:
-            img_input = input_tensor
+        img_input = Input(tensor=input_tensor, shape=input_shape)
 
-    if K.image_dim_ordering() == 'th':
+    if K.image_data_format() == 'channels_first':
         channel_axis = 1
     else:
         channel_axis = 3
 
-    x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid')
-    x = conv2d_bn(x, 32, 3, 3, border_mode='valid')
+    x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid')
+    x = conv2d_bn(x, 32, 3, 3, padding='valid')
     x = conv2d_bn(x, 64, 3, 3)
     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
-    x = conv2d_bn(x, 80, 1, 1, border_mode='valid')
-    x = conv2d_bn(x, 192, 3, 3, border_mode='valid')
+    x = conv2d_bn(x, 80, 1, 1, padding='valid')
+    x = conv2d_bn(x, 192, 3, 3, padding='valid')
     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
 
     # mixed 0, 1, 2: 35 x 35 x 256
-    for i in range(3):
-        branch1x1 = conv2d_bn(x, 64, 1, 1)
+    branch1x1 = conv2d_bn(x, 64, 1, 1)
+
+    branch5x5 = conv2d_bn(x, 48, 1, 1)
+    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
+
+    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
 
-        branch5x5 = conv2d_bn(x, 48, 1, 1)
-        branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
+    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
+    branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
+    x = layers.concatenate(
+        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+        axis=channel_axis,
+        name='mixed0')
 
-        branch3x3dbl = conv2d_bn(x, 64, 1, 1)
-        branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
-        branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+    # mixed 1: 35 x 35 x 256
+    branch1x1 = conv2d_bn(x, 64, 1, 1)
 
-        branch_pool = AveragePooling2D(
-            (3, 3), strides=(1, 1), border_mode='same')(x)
-        branch_pool = conv2d_bn(branch_pool, 32, 1, 1)
-        x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed' + str(i))
+    branch5x5 = conv2d_bn(x, 48, 1, 1)
+    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
+
+    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+
+    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
+    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
+    x = layers.concatenate(
+        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+        axis=channel_axis,
+        name='mixed1')
+
+    # mixed 2: 35 x 35 x 256
+    branch1x1 = conv2d_bn(x, 64, 1, 1)
+
+    branch5x5 = conv2d_bn(x, 48, 1, 1)
+    branch5x5 = conv2d_bn(branch5x5, 64, 5, 5)
+
+    branch3x3dbl = conv2d_bn(x, 64, 1, 1)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
+
+    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
+    branch_pool = conv2d_bn(branch_pool, 64, 1, 1)
+    x = layers.concatenate(
+        [branch1x1, branch5x5, branch3x3dbl, branch_pool],
+        axis=channel_axis,
+        name='mixed2')
 
     # mixed 3: 17 x 17 x 768
-    branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid')
+    branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid')
 
     branch3x3dbl = conv2d_bn(x, 64, 1, 1)
     branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3)
-    branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3,
-                             subsample=(2, 2), border_mode='valid')
+    branch3x3dbl = conv2d_bn(
+        branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid')
 
     branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
-    x = merge([branch3x3, branch3x3dbl, branch_pool],
-              mode='concat', concat_axis=channel_axis,
-              name='mixed3')
+    x = layers.concatenate(
+        [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3')
 
     # mixed 4: 17 x 17 x 768
     branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -169,11 +251,12 @@ def InceptionV3(include_top=True, weights='imagenet',
     branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1)
     branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
     branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-    x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-              mode='concat', concat_axis=channel_axis,
-              name='mixed4')
+    x = layers.concatenate(
+        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+        axis=channel_axis,
+        name='mixed4')
 
     # mixed 5, 6: 17 x 17 x 768
     for i in range(2):
@@ -190,11 +273,12 @@ def InceptionV3(include_top=True, weights='imagenet',
         branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
         branch_pool = AveragePooling2D(
-            (3, 3), strides=(1, 1), border_mode='same')(x)
+            (3, 3), strides=(1, 1), padding='same')(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed' + str(5 + i))
+        x = layers.concatenate(
+            [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+            axis=channel_axis,
+            name='mixed' + str(5 + i))
 
     # mixed 7: 17 x 17 x 768
     branch1x1 = conv2d_bn(x, 192, 1, 1)
@@ -203,33 +287,33 @@ def InceptionV3(include_top=True, weights='imagenet',
     branch7x7 = conv2d_bn(branch7x7, 192, 1, 7)
     branch7x7 = conv2d_bn(branch7x7, 192, 7, 1)
 
-    branch7x7dbl = conv2d_bn(x, 160, 1, 1)
+    branch7x7dbl = conv2d_bn(x, 192, 1, 1)
     branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
     branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
     branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1)
     branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7)
 
-    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x)
+    branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x)
     branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-    x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool],
-              mode='concat', concat_axis=channel_axis,
-              name='mixed7')
+    x = layers.concatenate(
+        [branch1x1, branch7x7, branch7x7dbl, branch_pool],
+        axis=channel_axis,
+        name='mixed7')
 
     # mixed 8: 8 x 8 x 1280
     branch3x3 = conv2d_bn(x, 192, 1, 1)
     branch3x3 = conv2d_bn(branch3x3, 320, 3, 3,
-                          subsample=(2, 2), border_mode='valid')
+                          strides=(2, 2), padding='valid')
 
     branch7x7x3 = conv2d_bn(x, 192, 1, 1)
     branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7)
     branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1)
-    branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3,
-                            subsample=(2, 2), border_mode='valid')
+    branch7x7x3 = conv2d_bn(
+        branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid')
 
-    branch_pool = AveragePooling2D((3, 3), strides=(2, 2))(x)
-    x = merge([branch3x3, branch7x7x3, branch_pool],
-              mode='concat', concat_axis=channel_axis,
-              name='mixed8')
+    branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x)
+    x = layers.concatenate(
+        [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8')
 
     # mixed 9: 8 x 8 x 2048
     for i in range(2):
@@ -238,71 +322,69 @@ def InceptionV3(include_top=True, weights='imagenet',
         branch3x3 = conv2d_bn(x, 384, 1, 1)
         branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3)
         branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1)
-        branch3x3 = merge([branch3x3_1, branch3x3_2],
-                          mode='concat', concat_axis=channel_axis,
-                          name='mixed9_' + str(i))
+        branch3x3 = layers.concatenate(
+            [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i))
 
         branch3x3dbl = conv2d_bn(x, 448, 1, 1)
         branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3)
         branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3)
         branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1)
-        branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2],
-                             mode='concat', concat_axis=channel_axis)
+        branch3x3dbl = layers.concatenate(
+            [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis)
 
         branch_pool = AveragePooling2D(
-            (3, 3), strides=(1, 1), border_mode='same')(x)
+            (3, 3), strides=(1, 1), padding='same')(x)
         branch_pool = conv2d_bn(branch_pool, 192, 1, 1)
-        x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool],
-                  mode='concat', concat_axis=channel_axis,
-                  name='mixed' + str(9 + i))
-
+        x = layers.concatenate(
+            [branch1x1, branch3x3, branch3x3dbl, branch_pool],
+            axis=channel_axis,
+            name='mixed' + str(9 + i))
     if include_top:
         # Classification block
-        x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x)
-        x = Flatten(name='flatten')(x)
-        x = Dense(1000, activation='softmax', name='predictions')(x)
-
-    # Create model
-    model = Model(img_input, x)
+        x = GlobalAveragePooling2D(name='avg_pool')(x)
+        x = Dense(classes, activation='softmax', name='predictions')(x)
+    else:
+        if pooling == 'avg':
+            x = GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x = GlobalMaxPooling2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='inception_v3')
 
     # load weights
     if weights == 'imagenet':
-        if K.image_dim_ordering() == 'th':
-            if include_top:
-                weights_path = get_file('inception_v3_weights_th_dim_ordering_th_kernels.h5',
-                                        TH_WEIGHTS_PATH,
-                                        cache_subdir='models',
-                                        md5_hash='b3baf3070cc4bf476d43a2ea61b0ca5f')
-            else:
-                weights_path = get_file('inception_v3_weights_th_dim_ordering_th_kernels_notop.h5',
-                                        TH_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models',
-                                        md5_hash='79aaa90ab4372b4593ba3df64e142f05')
-            model.load_weights(weights_path)
+        if K.image_data_format() == 'channels_first':
             if K.backend() == 'tensorflow':
                 warnings.warn('You are using the TensorFlow backend, yet you '
                               'are using the Theano '
-                              'image dimension ordering convention '
-                              '(`image_dim_ordering="th"`). '
+                              'image data format convention '
+                              '(`image_data_format="channels_first"`). '
                               'For best performance, set '
-                              '`image_dim_ordering="tf"` in '
+                              '`image_data_format="channels_last"` in '
                               'your Keras config '
                               'at ~/.keras/keras.json.')
-                convert_all_kernels_in_model(model)
+        if include_top:
+            weights_path = get_file(
+                'inception_v3_weights_tf_dim_ordering_tf_kernels.h5',
+                WEIGHTS_PATH,
+                cache_subdir='models',
+                md5_hash='9a0d58056eeedaa3f26cb7ebd46da564')
         else:
-            if include_top:
-                weights_path = get_file('inception_v3_weights_tf_dim_ordering_tf_kernels.h5',
-                                        TF_WEIGHTS_PATH,
-                                        cache_subdir='models',
-                                        md5_hash='fe114b3ff2ea4bf891e9353d1bbfb32f')
-            else:
-                weights_path = get_file('inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
-                                        TF_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models',
-                                        md5_hash='2f3609166de1d967d1a481094754f691')
-            model.load_weights(weights_path)
-            if K.backend() == 'theano':
-                convert_all_kernels_in_model(model)
+            weights_path = get_file(
+                'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                WEIGHTS_PATH_NO_TOP,
+                cache_subdir='models',
+                md5_hash='bcbd6486424b2319ff4ef7d526e38f63')
+        model.load_weights(weights_path)
+        if K.backend() == 'theano':
+            convert_all_kernels_in_model(model)
     return model
 
 
diff --git a/resnet50.py b/resnet50.py
index 271d271..b769031 100644
--- a/resnet50.py
+++ b/resnet50.py
@@ -12,113 +12,131 @@
 import numpy as np
 import warnings
 
-from keras.layers import merge, Input
-from keras.layers import Dense, Activation, Flatten
-from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D
+from keras.layers import Input
+from keras import layers
+from keras.layers import Dense
+from keras.layers import Activation
+from keras.layers import Flatten
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.layers import ZeroPadding2D
+from keras.layers import AveragePooling2D
+from keras.layers import GlobalAveragePooling2D
 from keras.layers import BatchNormalization
 from keras.models import Model
 from keras.preprocessing import image
 import keras.backend as K
-from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils import layer_utils
 from keras.utils.data_utils import get_file
-from imagenet_utils import decode_predictions, preprocess_input
+from keras.applications.imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import preprocess_input
+from keras.applications.imagenet_utils import _obtain_input_shape
+from keras.engine.topology import get_source_inputs
 
 
-TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels.h5'
-TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
+WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5'
+WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
 
 def identity_block(input_tensor, kernel_size, filters, stage, block):
-    '''The identity_block is the block that has no conv layer at shortcut
+    """The identity block is the block that has no conv layer at shortcut.
 
     # Arguments
         input_tensor: input tensor
         kernel_size: defualt 3, the kernel size of middle conv layer at main path
-        filters: list of integers, the nb_filters of 3 conv layer at main path
+        filters: list of integers, the filterss of 3 conv layer at main path
         stage: integer, current stage label, used for generating layer names
         block: 'a','b'..., current block label, used for generating layer names
-    '''
-    nb_filter1, nb_filter2, nb_filter3 = filters
-    if K.image_dim_ordering() == 'tf':
+
+    # Returns
+        Output tensor for the block.
+    """
+    filters1, filters2, filters3 = filters
+    if K.image_data_format() == 'channels_last':
         bn_axis = 3
     else:
         bn_axis = 1
     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
-    x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor)
+    x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter2, kernel_size, kernel_size,
-                      border_mode='same', name=conv_name_base + '2b')(x)
+    x = Conv2D(filters2, kernel_size,
+               padding='same', name=conv_name_base + '2b')(x)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
+    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 
-    x = merge([x, input_tensor], mode='sum')
+    x = layers.add([x, input_tensor])
     x = Activation('relu')(x)
     return x
 
 
 def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)):
-    '''conv_block is the block that has a conv layer at shortcut
+    """conv_block is the block that has a conv layer at shortcut
 
     # Arguments
         input_tensor: input tensor
         kernel_size: defualt 3, the kernel size of middle conv layer at main path
-        filters: list of integers, the nb_filters of 3 conv layer at main path
+        filters: list of integers, the filterss of 3 conv layer at main path
         stage: integer, current stage label, used for generating layer names
         block: 'a','b'..., current block label, used for generating layer names
 
-    Note that from stage 3, the first conv layer at main path is with subsample=(2,2)
-    And the shortcut should have subsample=(2,2) as well
-    '''
-    nb_filter1, nb_filter2, nb_filter3 = filters
-    if K.image_dim_ordering() == 'tf':
+    # Returns
+        Output tensor for the block.
+
+    Note that from stage 3, the first conv layer at main path is with strides=(2,2)
+    And the shortcut should have strides=(2,2) as well
+    """
+    filters1, filters2, filters3 = filters
+    if K.image_data_format() == 'channels_last':
         bn_axis = 3
     else:
         bn_axis = 1
     conv_name_base = 'res' + str(stage) + block + '_branch'
     bn_name_base = 'bn' + str(stage) + block + '_branch'
 
-    x = Convolution2D(nb_filter1, 1, 1, subsample=strides,
-                      name=conv_name_base + '2a')(input_tensor)
+    x = Conv2D(filters1, (1, 1), strides=strides,
+               name=conv_name_base + '2a')(input_tensor)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x)
     x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same',
-                      name=conv_name_base + '2b')(x)
+    x = Conv2D(filters2, kernel_size, padding='same',
+               name=conv_name_base + '2b')(x)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x)
     x = Activation('relu')(x)
 
-    x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x)
+    x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x)
     x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x)
 
-    shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides,
-                             name=conv_name_base + '1')(input_tensor)
+    shortcut = Conv2D(filters3, (1, 1), strides=strides,
+                      name=conv_name_base + '1')(input_tensor)
     shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut)
 
-    x = merge([x, shortcut], mode='sum')
+    x = layers.add([x, shortcut])
     x = Activation('relu')(x)
     return x
 
 
 def ResNet50(include_top=True, weights='imagenet',
-             input_tensor=None):
-    '''Instantiate the ResNet50 architecture,
-    optionally loading weights pre-trained
+             input_tensor=None, input_shape=None,
+             pooling=None,
+             classes=1000):
+    """Instantiates the ResNet50 architecture.
+
+    Optionally loads weights pre-trained
     on ImageNet. Note that when using TensorFlow,
     for best performance you should set
-    `image_dim_ordering="tf"` in your Keras config
+    `image_data_format="channels_last"` in your Keras config
     at ~/.keras/keras.json.
 
     The model and the weights are compatible with both
-    TensorFlow and Theano. The dimension ordering
+    TensorFlow and Theano. The data format
     convention used by the model is the one
     specified in your Keras config file.
 
@@ -127,42 +145,67 @@ def ResNet50(include_top=True, weights='imagenet',
             layers at the top of the network.
         weights: one of `None` (random initialization)
             or "imagenet" (pre-training on ImageNet).
-        input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`)
+        input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
             to use as image input for the model.
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(224, 224, 3)` (with `channels_last` data format)
+            or `(3, 224, 244)` (with `channels_first` data format).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 197.
+            E.g. `(200, 200, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a 2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is True, and
+            if no `weights` argument is specified.
 
     # Returns
         A Keras model instance.
-    '''
+
+    # Raises
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+    """
     if weights not in {'imagenet', None}:
         raise ValueError('The `weights` argument should be either '
                          '`None` (random initialization) or `imagenet` '
                          '(pre-training on ImageNet).')
+
+    if weights == 'imagenet' and include_top and classes != 1000:
+        raise ValueError('If using `weights` as imagenet with `include_top`'
+                         ' as true, `classes` should be 1000')
+
     # Determine proper input shape
-    if K.image_dim_ordering() == 'th':
-        if include_top:
-            input_shape = (3, 224, 224)
-        else:
-            input_shape = (3, None, None)
-    else:
-        if include_top:
-            input_shape = (224, 224, 3)
-        else:
-            input_shape = (None, None, 3)
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=224,
+                                      min_size=197,
+                                      data_format=K.image_data_format(),
+                                      include_top=include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
     else:
         if not K.is_keras_tensor(input_tensor):
-            img_input = Input(tensor=input_tensor)
+            img_input = Input(tensor=input_tensor, shape=input_shape)
         else:
             img_input = input_tensor
-    if K.image_dim_ordering() == 'tf':
+    if K.image_data_format() == 'channels_last':
         bn_axis = 3
     else:
         bn_axis = 1
 
     x = ZeroPadding2D((3, 3))(img_input)
-    x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x)
+    x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x)
     x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x)
     x = Activation('relu')(x)
     x = MaxPooling2D((3, 3), strides=(2, 2))(x)
@@ -191,49 +234,54 @@ def ResNet50(include_top=True, weights='imagenet',
 
     if include_top:
         x = Flatten()(x)
-        x = Dense(1000, activation='softmax', name='fc1000')(x)
-
-    model = Model(img_input, x)
+        x = Dense(classes, activation='softmax', name='fc1000')(x)
+    else:
+        if pooling == 'avg':
+            x = GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x = GlobalMaxPooling2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='resnet50')
 
     # load weights
     if weights == 'imagenet':
-        print('K.image_dim_ordering:', K.image_dim_ordering())
-        if K.image_dim_ordering() == 'th':
+        if include_top:
+            weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
+                                    WEIGHTS_PATH,
+                                    cache_subdir='models',
+                                    md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
+        else:
+            weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                    WEIGHTS_PATH_NO_TOP,
+                                    cache_subdir='models',
+                                    md5_hash='a268eb855778b3df3c7506639542a6af')
+        model.load_weights(weights_path)
+        if K.backend() == 'theano':
+            layer_utils.convert_all_kernels_in_model(model)
+
+        if K.image_data_format() == 'channels_first':
             if include_top:
-                weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels.h5',
-                                        TH_WEIGHTS_PATH,
-                                        cache_subdir='models',
-                                        md5_hash='1c1f8f5b0c8ee28fe9d950625a230e1c')
-            else:
-                weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels_notop.h5',
-                                        TH_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models',
-                                        md5_hash='f64f049c92468c9affcd44b0976cdafe')
-            model.load_weights(weights_path)
+                maxpool = model.get_layer(name='avg_pool')
+                shape = maxpool.output_shape[1:]
+                dense = model.get_layer(name='fc1000')
+                layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first')
+
             if K.backend() == 'tensorflow':
                 warnings.warn('You are using the TensorFlow backend, yet you '
                               'are using the Theano '
-                              'image dimension ordering convention '
-                              '(`image_dim_ordering="th"`). '
+                              'image data format convention '
+                              '(`image_data_format="channels_first"`). '
                               'For best performance, set '
-                              '`image_dim_ordering="tf"` in '
+                              '`image_data_format="channels_last"` in '
                               'your Keras config '
                               'at ~/.keras/keras.json.')
-                convert_all_kernels_in_model(model)
-        else:
-            if include_top:
-                weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5',
-                                        TF_WEIGHTS_PATH,
-                                        cache_subdir='models',
-                                        md5_hash='a7b3fe01876f51b976af0dea6bc144eb')
-            else:
-                weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5',
-                                        TF_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models',
-                                        md5_hash='a268eb855778b3df3c7506639542a6af')
-            model.load_weights(weights_path)
-            if K.backend() == 'theano':
-                convert_all_kernels_in_model(model)
     return model
 
 
diff --git a/vgg16.py b/vgg16.py
index 5beaf2d..373c5c2 100644
--- a/vgg16.py
+++ b/vgg16.py
@@ -12,32 +12,41 @@
 import warnings
 
 from keras.models import Model
-from keras.layers import Flatten, Dense, Input
-from keras.layers import Convolution2D, MaxPooling2D
+from keras.layers import Flatten
+from keras.layers import Dense
+from keras.layers import Input
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.layers import GlobalAveragePooling2D
 from keras.preprocessing import image
-from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils import layer_utils
 from keras.utils.data_utils import get_file
 from keras import backend as K
-from imagenet_utils import decode_predictions, preprocess_input
+from keras.applications.imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import preprocess_input
+from keras.applications.imagenet_utils import _obtain_input_shape
+from keras.engine.topology import get_source_inputs
 
 
-TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5'
-TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels_notop.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
+WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
+WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
 
 def VGG16(include_top=True, weights='imagenet',
-          input_tensor=None):
-    '''Instantiate the VGG16 architecture,
-    optionally loading weights pre-trained
+          input_tensor=None, input_shape=None,
+          pooling=None,
+          classes=1000):
+    """Instantiates the VGG16 architecture.
+
+    Optionally loads weights pre-trained
     on ImageNet. Note that when using TensorFlow,
     for best performance you should set
-    `image_dim_ordering="tf"` in your Keras config
+    `image_data_format="channels_last"` in your Keras config
     at ~/.keras/keras.json.
 
     The model and the weights are compatible with both
-    TensorFlow and Theano. The dimension ordering
+    TensorFlow and Theano. The data format
     convention used by the model is the one
     specified in your Keras config file.
 
@@ -48,59 +57,83 @@ def VGG16(include_top=True, weights='imagenet',
             or "imagenet" (pre-training on ImageNet).
         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
             to use as image input for the model.
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(224, 224, 3)` (with `channels_last` data format)
+            or `(3, 224, 244)` (with `channels_first` data format).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 48.
+            E.g. `(200, 200, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a 2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is True, and
+            if no `weights` argument is specified.
 
     # Returns
         A Keras model instance.
-    '''
+
+    # Raises
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+    """
     if weights not in {'imagenet', None}:
         raise ValueError('The `weights` argument should be either '
                          '`None` (random initialization) or `imagenet` '
                          '(pre-training on ImageNet).')
+
+    if weights == 'imagenet' and include_top and classes != 1000:
+        raise ValueError('If using `weights` as imagenet with `include_top`'
+                         ' as true, `classes` should be 1000')
     # Determine proper input shape
-    if K.image_dim_ordering() == 'th':
-        if include_top:
-            input_shape = (3, 224, 224)
-        else:
-            input_shape = (3, None, None)
-    else:
-        if include_top:
-            input_shape = (224, 224, 3)
-        else:
-            input_shape = (None, None, 3)
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=224,
+                                      min_size=48,
+                                      data_format=K.image_data_format(),
+                                      include_top=include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
     else:
         if not K.is_keras_tensor(input_tensor):
-            img_input = Input(tensor=input_tensor)
+            img_input = Input(tensor=input_tensor, shape=input_shape)
         else:
             img_input = input_tensor
     # Block 1
-    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv1')(img_input)
-    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv2')(x)
+    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
+    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
 
     # Block 2
-    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv1')(x)
-    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv2')(x)
+    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
+    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
 
     # Block 3
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv1')(x)
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv2')(x)
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv3')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
 
     # Block 4
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv1')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv2')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv3')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
 
     # Block 5
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv1')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv2')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv3')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
 
     if include_top:
@@ -108,46 +141,52 @@ def VGG16(include_top=True, weights='imagenet',
         x = Flatten(name='flatten')(x)
         x = Dense(4096, activation='relu', name='fc1')(x)
         x = Dense(4096, activation='relu', name='fc2')(x)
-        x = Dense(1000, activation='softmax', name='predictions')(x)
-
-    # Create model
-    model = Model(img_input, x)
+        x = Dense(classes, activation='softmax', name='predictions')(x)
+    else:
+        if pooling == 'avg':
+            x = GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x = GlobalMaxPooling2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='vgg16')
 
     # load weights
     if weights == 'imagenet':
-        print('K.image_dim_ordering:', K.image_dim_ordering())
-        if K.image_dim_ordering() == 'th':
+        if include_top:
+            weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
+                                    WEIGHTS_PATH,
+                                    cache_subdir='models')
+        else:
+            weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                    WEIGHTS_PATH_NO_TOP,
+                                    cache_subdir='models')
+        model.load_weights(weights_path)
+        if K.backend() == 'theano':
+            layer_utils.convert_all_kernels_in_model(model)
+
+        if K.image_data_format() == 'channels_first':
             if include_top:
-                weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels.h5',
-                                        TH_WEIGHTS_PATH,
-                                        cache_subdir='models')
-            else:
-                weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels_notop.h5',
-                                        TH_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models')
-            model.load_weights(weights_path)
+                maxpool = model.get_layer(name='block5_pool')
+                shape = maxpool.output_shape[1:]
+                dense = model.get_layer(name='fc1')
+                layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first')
+
             if K.backend() == 'tensorflow':
                 warnings.warn('You are using the TensorFlow backend, yet you '
                               'are using the Theano '
-                              'image dimension ordering convention '
-                              '(`image_dim_ordering="th"`). '
+                              'image data format convention '
+                              '(`image_data_format="channels_first"`). '
                               'For best performance, set '
-                              '`image_dim_ordering="tf"` in '
+                              '`image_data_format="channels_last"` in '
                               'your Keras config '
                               'at ~/.keras/keras.json.')
-                convert_all_kernels_in_model(model)
-        else:
-            if include_top:
-                weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5',
-                                        TF_WEIGHTS_PATH,
-                                        cache_subdir='models')
-            else:
-                weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5',
-                                        TF_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models')
-            model.load_weights(weights_path)
-            if K.backend() == 'theano':
-                convert_all_kernels_in_model(model)
     return model
 
 
diff --git a/vgg19.py b/vgg19.py
index 435d2e1..a9a64c7 100644
--- a/vgg19.py
+++ b/vgg19.py
@@ -13,31 +13,38 @@
 
 from keras.models import Model
 from keras.layers import Flatten, Dense, Input
-from keras.layers import Convolution2D, MaxPooling2D
+from keras.layers import Conv2D
+from keras.layers import MaxPooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.layers import GlobalAveragePooling2D
 from keras.preprocessing import image
-from keras.utils.layer_utils import convert_all_kernels_in_model
+from keras.utils import layer_utils
 from keras.utils.data_utils import get_file
 from keras import backend as K
-from imagenet_utils import decode_predictions, preprocess_input
+from keras.applications.imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import preprocess_input
+from keras.applications.imagenet_utils import _obtain_input_shape
+from keras.engine.topology import get_source_inputs
 
 
-TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels.h5'
-TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5'
-TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels_notop.h5'
-TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'
+WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5'
+WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5'
 
 
 def VGG19(include_top=True, weights='imagenet',
-          input_tensor=None):
-    '''Instantiate the VGG19 architecture,
-    optionally loading weights pre-trained
+          input_tensor=None, input_shape=None,
+          pooling=None,
+          classes=1000):
+    """Instantiates the VGG19 architecture.
+
+    Optionally loads weights pre-trained
     on ImageNet. Note that when using TensorFlow,
     for best performance you should set
-    `image_dim_ordering="tf"` in your Keras config
+    `image_data_format="channels_last"` in your Keras config
     at ~/.keras/keras.json.
 
     The model and the weights are compatible with both
-    TensorFlow and Theano. The dimension ordering
+    TensorFlow and Theano. The data format
     convention used by the model is the one
     specified in your Keras config file.
 
@@ -48,62 +55,86 @@ def VGG19(include_top=True, weights='imagenet',
             or "imagenet" (pre-training on ImageNet).
         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
             to use as image input for the model.
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(224, 224, 3)` (with `channels_last` data format)
+            or `(3, 224, 244)` (with `channels_first` data format).
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 48.
+            E.g. `(200, 200, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a 2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is True, and
+            if no `weights` argument is specified.
 
     # Returns
         A Keras model instance.
-    '''
+
+    # Raises
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+    """
     if weights not in {'imagenet', None}:
         raise ValueError('The `weights` argument should be either '
                          '`None` (random initialization) or `imagenet` '
                          '(pre-training on ImageNet).')
+
+    if weights == 'imagenet' and include_top and classes != 1000:
+        raise ValueError('If using `weights` as imagenet with `include_top`'
+                         ' as true, `classes` should be 1000')
     # Determine proper input shape
-    if K.image_dim_ordering() == 'th':
-        if include_top:
-            input_shape = (3, 224, 224)
-        else:
-            input_shape = (3, None, None)
-    else:
-        if include_top:
-            input_shape = (224, 224, 3)
-        else:
-            input_shape = (None, None, 3)
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=224,
+                                      min_size=48,
+                                      data_format=K.image_data_format(),
+                                      include_top=include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
     else:
         if not K.is_keras_tensor(input_tensor):
-            img_input = Input(tensor=input_tensor)
+            img_input = Input(tensor=input_tensor, shape=input_shape)
         else:
             img_input = input_tensor
     # Block 1
-    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv1')(img_input)
-    x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv2')(x)
+    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input)
+    x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x)
 
     # Block 2
-    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv1')(x)
-    x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv2')(x)
+    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x)
+    x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x)
 
     # Block 3
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv1')(x)
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv2')(x)
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv3')(x)
-    x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv4')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x)
+    x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x)
 
     # Block 4
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv1')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv2')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv3')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv4')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x)
 
     # Block 5
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv1')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv2')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv3')(x)
-    x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv4')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x)
+    x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x)
     x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x)
 
     if include_top:
@@ -111,46 +142,52 @@ def VGG19(include_top=True, weights='imagenet',
         x = Flatten(name='flatten')(x)
         x = Dense(4096, activation='relu', name='fc1')(x)
         x = Dense(4096, activation='relu', name='fc2')(x)
-        x = Dense(1000, activation='softmax', name='predictions')(x)
-
-    # Create model
-    model = Model(img_input, x)
+        x = Dense(classes, activation='softmax', name='predictions')(x)
+    else:
+        if pooling == 'avg':
+            x = GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x = GlobalMaxPooling2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='vgg19')
 
     # load weights
     if weights == 'imagenet':
-        print('K.image_dim_ordering:', K.image_dim_ordering())
-        if K.image_dim_ordering() == 'th':
+        if include_top:
+            weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5',
+                                    WEIGHTS_PATH,
+                                    cache_subdir='models')
+        else:
+            weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
+                                    WEIGHTS_PATH_NO_TOP,
+                                    cache_subdir='models')
+        model.load_weights(weights_path)
+        if K.backend() == 'theano':
+            layer_utils.convert_all_kernels_in_model(model)
+
+        if K.image_data_format() == 'channels_first':
             if include_top:
-                weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels.h5',
-                                        TH_WEIGHTS_PATH,
-                                        cache_subdir='models')
-            else:
-                weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels_notop.h5',
-                                        TH_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models')
-            model.load_weights(weights_path)
+                maxpool = model.get_layer(name='block5_pool')
+                shape = maxpool.output_shape[1:]
+                dense = model.get_layer(name='fc1')
+                layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first')
+
             if K.backend() == 'tensorflow':
                 warnings.warn('You are using the TensorFlow backend, yet you '
                               'are using the Theano '
-                              'image dimension ordering convention '
-                              '(`image_dim_ordering="th"`). '
+                              'image data format convention '
+                              '(`image_data_format="channels_first"`). '
                               'For best performance, set '
-                              '`image_dim_ordering="tf"` in '
+                              '`image_data_format="channels_last"` in '
                               'your Keras config '
                               'at ~/.keras/keras.json.')
-                convert_all_kernels_in_model(model)
-        else:
-            if include_top:
-                weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5',
-                                        TF_WEIGHTS_PATH,
-                                        cache_subdir='models')
-            else:
-                weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5',
-                                        TF_WEIGHTS_PATH_NO_TOP,
-                                        cache_subdir='models')
-            model.load_weights(weights_path)
-            if K.backend() == 'theano':
-                convert_all_kernels_in_model(model)
     return model
 
 
diff --git a/xception.py b/xception.py
index bcf541c..b1797fc 100644
--- a/xception.py
+++ b/xception.py
@@ -23,13 +23,24 @@
 import warnings
 import numpy as np
 
-from keras.models import Model
-from keras.layers import Dense, Input, BatchNormalization, Activation, merge
-from keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D
 from keras.preprocessing import image
+
+from keras.models import Model
+from keras import layers
+from keras.layers import Dense
+from keras.layers import Input
+from keras.layers import BatchNormalization
+from keras.layers import Activation
+from keras.layers import Conv2D
+from keras.layers import SeparableConv2D
+from keras.layers import MaxPooling2D
+from keras.layers import GlobalAveragePooling2D
+from keras.layers import GlobalMaxPooling2D
+from keras.engine.topology import get_source_inputs
 from keras.utils.data_utils import get_file
 from keras import backend as K
-from imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import decode_predictions
+from keras.applications.imagenet_utils import _obtain_input_shape
 
 
 TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5'
@@ -37,13 +48,16 @@
 
 
 def Xception(include_top=True, weights='imagenet',
-             input_tensor=None):
-    '''Instantiate the Xception architecture,
-    optionally loading weights pre-trained
+             input_tensor=None, input_shape=None,
+             pooling=None,
+             classes=1000):
+    """Instantiates the Xception architecture.
+
+    Optionally loads weights pre-trained
     on ImageNet. This model is available for TensorFlow only,
     and can only be used with inputs following the TensorFlow
-    dimension ordering `(width, height, channels)`.
-    You should set `image_dim_ordering="tf"` in your Keras config
+    data format `(width, height, channels)`.
+    You should set `image_data_format="channels_last"` in your Keras config
     located at ~/.keras/keras.json.
 
     Note that the default input image size for this model is 299x299.
@@ -55,37 +69,69 @@ def Xception(include_top=True, weights='imagenet',
             or "imagenet" (pre-training on ImageNet).
         input_tensor: optional Keras tensor (i.e. output of `layers.Input()`)
             to use as image input for the model.
+        input_shape: optional shape tuple, only to be specified
+            if `include_top` is False (otherwise the input shape
+            has to be `(299, 299, 3)`.
+            It should have exactly 3 inputs channels,
+            and width and height should be no smaller than 71.
+            E.g. `(150, 150, 3)` would be one valid value.
+        pooling: Optional pooling mode for feature extraction
+            when `include_top` is `False`.
+            - `None` means that the output of the model will be
+                the 4D tensor output of the
+                last convolutional layer.
+            - `avg` means that global average pooling
+                will be applied to the output of the
+                last convolutional layer, and thus
+                the output of the model will be a 2D tensor.
+            - `max` means that global max pooling will
+                be applied.
+        classes: optional number of classes to classify images
+            into, only to be specified if `include_top` is True, and
+            if no `weights` argument is specified.
 
     # Returns
         A Keras model instance.
-    '''
+
+    # Raises
+        ValueError: in case of invalid argument for `weights`,
+            or invalid input shape.
+        RuntimeError: If attempting to run this model with a
+            backend that does not support separable convolutions.
+    """
     if weights not in {'imagenet', None}:
         raise ValueError('The `weights` argument should be either '
                          '`None` (random initialization) or `imagenet` '
                          '(pre-training on ImageNet).')
+
+    if weights == 'imagenet' and include_top and classes != 1000:
+        raise ValueError('If using `weights` as imagenet with `include_top`'
+                         ' as true, `classes` should be 1000')
+
     if K.backend() != 'tensorflow':
-        raise Exception('The Xception model is only available with '
-                        'the TensorFlow backend.')
-    if K.image_dim_ordering() != 'tf':
+        raise RuntimeError('The Xception model is only available with '
+                           'the TensorFlow backend.')
+    if K.image_data_format() != 'channels_last':
         warnings.warn('The Xception model is only available for the '
-                      'input dimension ordering "tf" '
+                      'input data format "channels_last" '
                       '(width, height, channels). '
                       'However your settings specify the default '
-                      'dimension ordering "th" (channels, width, height). '
-                      'You should set `image_dim_ordering="tf"` in your Keras '
+                      'data format "channels_first" (channels, width, height). '
+                      'You should set `image_data_format="channels_last"` in your Keras '
                       'config located at ~/.keras/keras.json. '
                       'The model being returned right now will expect inputs '
-                      'to follow the "tf" dimension ordering.')
-        K.set_image_dim_ordering('tf')
-        old_dim_ordering = 'th'
+                      'to follow the "channels_last" data format.')
+        K.set_image_data_format('channels_last')
+        old_data_format = 'channels_first'
     else:
-        old_dim_ordering = None
+        old_data_format = None
 
     # Determine proper input shape
-    if include_top:
-        input_shape = (299, 299, 3)
-    else:
-        input_shape = (None, None, 3)
+    input_shape = _obtain_input_shape(input_shape,
+                                      default_size=299,
+                                      min_size=71,
+                                      data_format=K.image_data_format(),
+                                      include_top=include_top)
 
     if input_tensor is None:
         img_input = Input(shape=input_shape)
@@ -95,98 +141,109 @@ def Xception(include_top=True, weights='imagenet',
         else:
             img_input = input_tensor
 
-    x = Conv2D(32, 3, 3, subsample=(2, 2), bias=False, name='block1_conv1')(img_input)
+    x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input)
     x = BatchNormalization(name='block1_conv1_bn')(x)
     x = Activation('relu', name='block1_conv1_act')(x)
-    x = Conv2D(64, 3, 3, bias=False, name='block1_conv2')(x)
+    x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x)
     x = BatchNormalization(name='block1_conv2_bn')(x)
     x = Activation('relu', name='block1_conv2_act')(x)
 
-    residual = Conv2D(128, 1, 1, subsample=(2, 2),
-                      border_mode='same', bias=False)(x)
+    residual = Conv2D(128, (1, 1), strides=(2, 2),
+                      padding='same', use_bias=False)(x)
     residual = BatchNormalization()(residual)
 
-    x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv1')(x)
+    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x)
     x = BatchNormalization(name='block2_sepconv1_bn')(x)
     x = Activation('relu', name='block2_sepconv2_act')(x)
-    x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv2')(x)
+    x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x)
     x = BatchNormalization(name='block2_sepconv2_bn')(x)
 
-    x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block2_pool')(x)
-    x = merge([x, residual], mode='sum')
+    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x)
+    x = layers.add([x, residual])
 
-    residual = Conv2D(256, 1, 1, subsample=(2, 2),
-                      border_mode='same', bias=False)(x)
+    residual = Conv2D(256, (1, 1), strides=(2, 2),
+                      padding='same', use_bias=False)(x)
     residual = BatchNormalization()(residual)
 
     x = Activation('relu', name='block3_sepconv1_act')(x)
-    x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv1')(x)
+    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x)
     x = BatchNormalization(name='block3_sepconv1_bn')(x)
     x = Activation('relu', name='block3_sepconv2_act')(x)
-    x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv2')(x)
+    x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x)
     x = BatchNormalization(name='block3_sepconv2_bn')(x)
 
-    x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block3_pool')(x)
-    x = merge([x, residual], mode='sum')
+    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x)
+    x = layers.add([x, residual])
 
-    residual = Conv2D(728, 1, 1, subsample=(2, 2),
-                      border_mode='same', bias=False)(x)
+    residual = Conv2D(728, (1, 1), strides=(2, 2),
+                      padding='same', use_bias=False)(x)
     residual = BatchNormalization()(residual)
 
     x = Activation('relu', name='block4_sepconv1_act')(x)
-    x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv1')(x)
+    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x)
     x = BatchNormalization(name='block4_sepconv1_bn')(x)
     x = Activation('relu', name='block4_sepconv2_act')(x)
-    x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv2')(x)
+    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x)
     x = BatchNormalization(name='block4_sepconv2_bn')(x)
 
-    x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block4_pool')(x)
-    x = merge([x, residual], mode='sum')
+    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x)
+    x = layers.add([x, residual])
 
     for i in range(8):
         residual = x
         prefix = 'block' + str(i + 5)
 
         x = Activation('relu', name=prefix + '_sepconv1_act')(x)
-        x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv1')(x)
+        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x)
         x = BatchNormalization(name=prefix + '_sepconv1_bn')(x)
         x = Activation('relu', name=prefix + '_sepconv2_act')(x)
-        x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv2')(x)
+        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x)
         x = BatchNormalization(name=prefix + '_sepconv2_bn')(x)
         x = Activation('relu', name=prefix + '_sepconv3_act')(x)
-        x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv3')(x)
+        x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x)
         x = BatchNormalization(name=prefix + '_sepconv3_bn')(x)
 
-        x = merge([x, residual], mode='sum')
+        x = layers.add([x, residual])
 
-    residual = Conv2D(1024, 1, 1, subsample=(2, 2),
-                      border_mode='same', bias=False)(x)
+    residual = Conv2D(1024, (1, 1), strides=(2, 2),
+                      padding='same', use_bias=False)(x)
     residual = BatchNormalization()(residual)
 
     x = Activation('relu', name='block13_sepconv1_act')(x)
-    x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block13_sepconv1')(x)
+    x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x)
     x = BatchNormalization(name='block13_sepconv1_bn')(x)
     x = Activation('relu', name='block13_sepconv2_act')(x)
-    x = SeparableConv2D(1024, 3, 3, border_mode='same', bias=False, name='block13_sepconv2')(x)
+    x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x)
     x = BatchNormalization(name='block13_sepconv2_bn')(x)
 
-    x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block13_pool')(x)
-    x = merge([x, residual], mode='sum')
+    x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x)
+    x = layers.add([x, residual])
 
-    x = SeparableConv2D(1536, 3, 3, border_mode='same', bias=False, name='block14_sepconv1')(x)
+    x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x)
     x = BatchNormalization(name='block14_sepconv1_bn')(x)
     x = Activation('relu', name='block14_sepconv1_act')(x)
 
-    x = SeparableConv2D(2048, 3, 3, border_mode='same', bias=False, name='block14_sepconv2')(x)
+    x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x)
     x = BatchNormalization(name='block14_sepconv2_bn')(x)
     x = Activation('relu', name='block14_sepconv2_act')(x)
 
     if include_top:
         x = GlobalAveragePooling2D(name='avg_pool')(x)
-        x = Dense(1000, activation='softmax', name='predictions')(x)
-
-    # Create model
-    model = Model(img_input, x)
+        x = Dense(classes, activation='softmax', name='predictions')(x)
+    else:
+        if pooling == 'avg':
+            x = GlobalAveragePooling2D()(x)
+        elif pooling == 'max':
+            x = GlobalMaxPooling2D()(x)
+
+    # Ensure that the model takes into account
+    # any potential predecessors of `input_tensor`.
+    if input_tensor is not None:
+        inputs = get_source_inputs(input_tensor)
+    else:
+        inputs = img_input
+    # Create model.
+    model = Model(inputs, x, name='xception')
 
     # load weights
     if weights == 'imagenet':
@@ -200,8 +257,8 @@ def Xception(include_top=True, weights='imagenet',
                                     cache_subdir='models')
         model.load_weights(weights_path)
 
-    if old_dim_ordering:
-        K.set_image_dim_ordering(old_dim_ordering)
+    if old_data_format:
+        K.set_image_data_format(old_data_format)
     return model
 
 
@@ -223,4 +280,5 @@ def preprocess_input(x):
     print('Input image shape:', x.shape)
 
     preds = model.predict(x)
+    print(np.argmax(preds))
     print('Predicted:', decode_predictions(preds, 1))