diff --git a/inception_v3.py b/inception_v3.py index 4fca301..dbb34d1 100644 --- a/inception_v3.py +++ b/inception_v3.py @@ -1,160 +1,242 @@ # -*- coding: utf-8 -*- -'''Inception V3 model for Keras. +"""Inception V3 model for Keras. -Note that the ImageNet weights provided are from a model that had not fully converged. -Inception v3 should be able to reach 6.9% top-5 error, but our model -only gets to 7.8% (same as a fully-converged ResNet 50). -For comparison, VGG16 only gets to 9.9%, quite a bit worse. +Note that the input image format for this model is different than for +the VGG16 and ResNet models (299x299 instead of 224x224), +and that the input preprocessing function is also different (same as Xception). -Also, do note that the input image format for this model is different than for -other models (299x299 instead of 224x224), and that the input preprocessing function -is also different. - -# Reference: +# Reference - [Rethinking the Inception Architecture for Computer Vision](http://arxiv.org/abs/1512.00567) -''' +""" from __future__ import print_function +from __future__ import absolute_import -import numpy as np import warnings +import numpy as np from keras.models import Model -from keras.layers import Flatten, Dense, Input, BatchNormalization, merge -from keras.layers import Convolution2D, MaxPooling2D, AveragePooling2D -from keras.preprocessing import image +from keras import layers +from keras.layers import Activation +from keras.layers import Dense +from keras.layers import Input +from keras.layers import BatchNormalization +from keras.layers import Conv2D +from keras.layers import MaxPooling2D +from keras.layers import AveragePooling2D +from keras.layers import GlobalAveragePooling2D +from keras.layers import GlobalMaxPooling2D +from keras.engine.topology import get_source_inputs from keras.utils.layer_utils import convert_all_kernels_in_model from keras.utils.data_utils import get_file from keras import backend as K -from imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import _obtain_input_shape +from keras.preprocessing import image -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' +WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels.h5' +WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.5/inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5' -def conv2d_bn(x, nb_filter, nb_row, nb_col, - border_mode='same', subsample=(1, 1), +def conv2d_bn(x, + filters, + num_row, + num_col, + padding='same', + strides=(1, 1), name=None): - '''Utility function to apply conv + BN. - ''' + """Utility function to apply conv + BN. + + Arguments: + x: input tensor. + filters: filters in `Conv2D`. + num_row: height of the convolution kernel. + num_col: width of the convolution kernel. + padding: padding mode in `Conv2D`. + strides: strides in `Conv2D`. + name: name of the ops; will become `name + '_conv'` + for the convolution and `name + '_bn'` for the + batch norm layer. + + Returns: + Output tensor after applying `Conv2D` and `BatchNormalization`. + """ if name is not None: bn_name = name + '_bn' conv_name = name + '_conv' else: bn_name = None conv_name = None - if K.image_dim_ordering() == 'th': + if K.image_data_format() == 'channels_first': bn_axis = 1 else: bn_axis = 3 - x = Convolution2D(nb_filter, nb_row, nb_col, - subsample=subsample, - activation='relu', - border_mode=border_mode, - name=conv_name)(x) - x = BatchNormalization(axis=bn_axis, name=bn_name)(x) + x = Conv2D( + filters, (num_row, num_col), + strides=strides, + padding=padding, + use_bias=False, + name=conv_name)(x) + x = BatchNormalization(axis=bn_axis, scale=False, name=bn_name)(x) + x = Activation('relu', name=name)(x) return x -def InceptionV3(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the Inception v3 architecture, - optionally loading weights pre-trained +def InceptionV3(include_top=True, + weights='imagenet', + input_tensor=None, + input_shape=None, + pooling=None, + classes=1000): + """Instantiates the Inception v3 architecture. + + Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set - `image_dim_ordering="tf"` in your Keras config + `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. - The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering + TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. - Note that the default input image size for this model is 299x299. - # Arguments - include_top: whether to include the 3 fully-connected - layers at the top of the network. + Arguments: + include_top: whether to include the fully-connected + layer at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. - - # Returns + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)` (with `channels_last` data format) + or `(3, 299, 299)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 139. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. + + Returns: A Keras model instance. - ''' + + Raises: + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 299, 299) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (299, 299, 3) - else: - input_shape = (None, None, 3) + input_shape = _obtain_input_shape( + input_shape, + default_size=299, + min_size=139, + data_format=K.image_data_format(), + include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: - if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) - else: - img_input = input_tensor + img_input = Input(tensor=input_tensor, shape=input_shape) - if K.image_dim_ordering() == 'th': + if K.image_data_format() == 'channels_first': channel_axis = 1 else: channel_axis = 3 - x = conv2d_bn(img_input, 32, 3, 3, subsample=(2, 2), border_mode='valid') - x = conv2d_bn(x, 32, 3, 3, border_mode='valid') + x = conv2d_bn(img_input, 32, 3, 3, strides=(2, 2), padding='valid') + x = conv2d_bn(x, 32, 3, 3, padding='valid') x = conv2d_bn(x, 64, 3, 3) x = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = conv2d_bn(x, 80, 1, 1, border_mode='valid') - x = conv2d_bn(x, 192, 3, 3, border_mode='valid') + x = conv2d_bn(x, 80, 1, 1, padding='valid') + x = conv2d_bn(x, 192, 3, 3, padding='valid') x = MaxPooling2D((3, 3), strides=(2, 2))(x) # mixed 0, 1, 2: 35 x 35 x 256 - for i in range(3): - branch1x1 = conv2d_bn(x, 64, 1, 1) + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch5x5 = conv2d_bn(x, 48, 1, 1) - branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 32, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed0') - branch3x3dbl = conv2d_bn(x, 64, 1, 1) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + # mixed 1: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) - branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) - branch_pool = conv2d_bn(branch_pool, 32, 1, 1) - x = merge([branch1x1, branch5x5, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(i)) + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed1') + + # mixed 2: 35 x 35 x 256 + branch1x1 = conv2d_bn(x, 64, 1, 1) + + branch5x5 = conv2d_bn(x, 48, 1, 1) + branch5x5 = conv2d_bn(branch5x5, 64, 5, 5) + + branch3x3dbl = conv2d_bn(x, 64, 1, 1) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) + + branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) + branch_pool = conv2d_bn(branch_pool, 64, 1, 1) + x = layers.concatenate( + [branch1x1, branch5x5, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed2') # mixed 3: 17 x 17 x 768 - branch3x3 = conv2d_bn(x, 384, 3, 3, subsample=(2, 2), border_mode='valid') + branch3x3 = conv2d_bn(x, 384, 3, 3, strides=(2, 2), padding='valid') branch3x3dbl = conv2d_bn(x, 64, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3) - branch3x3dbl = conv2d_bn(branch3x3dbl, 96, 3, 3, - subsample=(2, 2), border_mode='valid') + branch3x3dbl = conv2d_bn( + branch3x3dbl, 96, 3, 3, strides=(2, 2), padding='valid') branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) - x = merge([branch3x3, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed3') + x = layers.concatenate( + [branch3x3, branch3x3dbl, branch_pool], axis=channel_axis, name='mixed3') # mixed 4: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) @@ -169,11 +251,12 @@ def InceptionV3(include_top=True, weights='imagenet', branch7x7dbl = conv2d_bn(branch7x7dbl, 128, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) + branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed4') + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed4') # mixed 5, 6: 17 x 17 x 768 for i in range(2): @@ -190,11 +273,12 @@ def InceptionV3(include_top=True, weights='imagenet', branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) + (3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(5 + i)) + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed' + str(5 + i)) # mixed 7: 17 x 17 x 768 branch1x1 = conv2d_bn(x, 192, 1, 1) @@ -203,33 +287,33 @@ def InceptionV3(include_top=True, weights='imagenet', branch7x7 = conv2d_bn(branch7x7, 192, 1, 7) branch7x7 = conv2d_bn(branch7x7, 192, 7, 1) - branch7x7dbl = conv2d_bn(x, 160, 1, 1) + branch7x7dbl = conv2d_bn(x, 192, 1, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 7, 1) branch7x7dbl = conv2d_bn(branch7x7dbl, 192, 1, 7) - branch_pool = AveragePooling2D((3, 3), strides=(1, 1), border_mode='same')(x) + branch_pool = AveragePooling2D((3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch7x7, branch7x7dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed7') + x = layers.concatenate( + [branch1x1, branch7x7, branch7x7dbl, branch_pool], + axis=channel_axis, + name='mixed7') # mixed 8: 8 x 8 x 1280 branch3x3 = conv2d_bn(x, 192, 1, 1) branch3x3 = conv2d_bn(branch3x3, 320, 3, 3, - subsample=(2, 2), border_mode='valid') + strides=(2, 2), padding='valid') branch7x7x3 = conv2d_bn(x, 192, 1, 1) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 1, 7) branch7x7x3 = conv2d_bn(branch7x7x3, 192, 7, 1) - branch7x7x3 = conv2d_bn(branch7x7x3, 192, 3, 3, - subsample=(2, 2), border_mode='valid') + branch7x7x3 = conv2d_bn( + branch7x7x3, 192, 3, 3, strides=(2, 2), padding='valid') - branch_pool = AveragePooling2D((3, 3), strides=(2, 2))(x) - x = merge([branch3x3, branch7x7x3, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed8') + branch_pool = MaxPooling2D((3, 3), strides=(2, 2))(x) + x = layers.concatenate( + [branch3x3, branch7x7x3, branch_pool], axis=channel_axis, name='mixed8') # mixed 9: 8 x 8 x 2048 for i in range(2): @@ -238,71 +322,69 @@ def InceptionV3(include_top=True, weights='imagenet', branch3x3 = conv2d_bn(x, 384, 1, 1) branch3x3_1 = conv2d_bn(branch3x3, 384, 1, 3) branch3x3_2 = conv2d_bn(branch3x3, 384, 3, 1) - branch3x3 = merge([branch3x3_1, branch3x3_2], - mode='concat', concat_axis=channel_axis, - name='mixed9_' + str(i)) + branch3x3 = layers.concatenate( + [branch3x3_1, branch3x3_2], axis=channel_axis, name='mixed9_' + str(i)) branch3x3dbl = conv2d_bn(x, 448, 1, 1) branch3x3dbl = conv2d_bn(branch3x3dbl, 384, 3, 3) branch3x3dbl_1 = conv2d_bn(branch3x3dbl, 384, 1, 3) branch3x3dbl_2 = conv2d_bn(branch3x3dbl, 384, 3, 1) - branch3x3dbl = merge([branch3x3dbl_1, branch3x3dbl_2], - mode='concat', concat_axis=channel_axis) + branch3x3dbl = layers.concatenate( + [branch3x3dbl_1, branch3x3dbl_2], axis=channel_axis) branch_pool = AveragePooling2D( - (3, 3), strides=(1, 1), border_mode='same')(x) + (3, 3), strides=(1, 1), padding='same')(x) branch_pool = conv2d_bn(branch_pool, 192, 1, 1) - x = merge([branch1x1, branch3x3, branch3x3dbl, branch_pool], - mode='concat', concat_axis=channel_axis, - name='mixed' + str(9 + i)) - + x = layers.concatenate( + [branch1x1, branch3x3, branch3x3dbl, branch_pool], + axis=channel_axis, + name='mixed' + str(9 + i)) if include_top: # Classification block - x = AveragePooling2D((8, 8), strides=(8, 8), name='avg_pool')(x) - x = Flatten(name='flatten')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) + x = GlobalAveragePooling2D(name='avg_pool')(x) + x = Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = Model(inputs, x, name='inception_v3') # load weights if weights == 'imagenet': - if K.image_dim_ordering() == 'th': - if include_top: - weights_path = get_file('inception_v3_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='b3baf3070cc4bf476d43a2ea61b0ca5f') - else: - weights_path = get_file('inception_v3_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='79aaa90ab4372b4593ba3df64e142f05') - model.load_weights(weights_path) + if K.image_data_format() == 'channels_first': if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' 'For best performance, set ' - '`image_dim_ordering="tf"` in ' + '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) + if include_top: + weights_path = get_file( + 'inception_v3_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + md5_hash='9a0d58056eeedaa3f26cb7ebd46da564') else: - if include_top: - weights_path = get_file('inception_v3_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='fe114b3ff2ea4bf891e9353d1bbfb32f') - else: - weights_path = get_file('inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='2f3609166de1d967d1a481094754f691') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) + weights_path = get_file( + 'inception_v3_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='bcbd6486424b2319ff4ef7d526e38f63') + model.load_weights(weights_path) + if K.backend() == 'theano': + convert_all_kernels_in_model(model) return model diff --git a/resnet50.py b/resnet50.py index 271d271..b769031 100644 --- a/resnet50.py +++ b/resnet50.py @@ -12,113 +12,131 @@ import numpy as np import warnings -from keras.layers import merge, Input -from keras.layers import Dense, Activation, Flatten -from keras.layers import Convolution2D, MaxPooling2D, ZeroPadding2D, AveragePooling2D +from keras.layers import Input +from keras import layers +from keras.layers import Dense +from keras.layers import Activation +from keras.layers import Flatten +from keras.layers import Conv2D +from keras.layers import MaxPooling2D +from keras.layers import GlobalMaxPooling2D +from keras.layers import ZeroPadding2D +from keras.layers import AveragePooling2D +from keras.layers import GlobalAveragePooling2D from keras.layers import BatchNormalization from keras.models import Model from keras.preprocessing import image import keras.backend as K -from keras.utils.layer_utils import convert_all_kernels_in_model +from keras.utils import layer_utils from keras.utils.data_utils import get_file -from imagenet_utils import decode_predictions, preprocess_input +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import preprocess_input +from keras.applications.imagenet_utils import _obtain_input_shape +from keras.engine.topology import get_source_inputs -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' +WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels.h5' +WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.2/resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5' def identity_block(input_tensor, kernel_size, filters, stage, block): - '''The identity_block is the block that has no conv layer at shortcut + """The identity block is the block that has no conv layer at shortcut. # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names - ''' - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_dim_ordering() == 'tf': + + # Returns + Output tensor for the block. + """ + filters1, filters2, filters3 = filters + if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' - x = Convolution2D(nb_filter1, 1, 1, name=conv_name_base + '2a')(input_tensor) + x = Conv2D(filters1, (1, 1), name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, - border_mode='same', name=conv_name_base + '2b')(x) + x = Conv2D(filters2, kernel_size, + padding='same', name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) + x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - x = merge([x, input_tensor], mode='sum') + x = layers.add([x, input_tensor]) x = Activation('relu')(x) return x def conv_block(input_tensor, kernel_size, filters, stage, block, strides=(2, 2)): - '''conv_block is the block that has a conv layer at shortcut + """conv_block is the block that has a conv layer at shortcut # Arguments input_tensor: input tensor kernel_size: defualt 3, the kernel size of middle conv layer at main path - filters: list of integers, the nb_filters of 3 conv layer at main path + filters: list of integers, the filterss of 3 conv layer at main path stage: integer, current stage label, used for generating layer names block: 'a','b'..., current block label, used for generating layer names - Note that from stage 3, the first conv layer at main path is with subsample=(2,2) - And the shortcut should have subsample=(2,2) as well - ''' - nb_filter1, nb_filter2, nb_filter3 = filters - if K.image_dim_ordering() == 'tf': + # Returns + Output tensor for the block. + + Note that from stage 3, the first conv layer at main path is with strides=(2,2) + And the shortcut should have strides=(2,2) as well + """ + filters1, filters2, filters3 = filters + if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 conv_name_base = 'res' + str(stage) + block + '_branch' bn_name_base = 'bn' + str(stage) + block + '_branch' - x = Convolution2D(nb_filter1, 1, 1, subsample=strides, - name=conv_name_base + '2a')(input_tensor) + x = Conv2D(filters1, (1, 1), strides=strides, + name=conv_name_base + '2a')(input_tensor) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2a')(x) x = Activation('relu')(x) - x = Convolution2D(nb_filter2, kernel_size, kernel_size, border_mode='same', - name=conv_name_base + '2b')(x) + x = Conv2D(filters2, kernel_size, padding='same', + name=conv_name_base + '2b')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2b')(x) x = Activation('relu')(x) - x = Convolution2D(nb_filter3, 1, 1, name=conv_name_base + '2c')(x) + x = Conv2D(filters3, (1, 1), name=conv_name_base + '2c')(x) x = BatchNormalization(axis=bn_axis, name=bn_name_base + '2c')(x) - shortcut = Convolution2D(nb_filter3, 1, 1, subsample=strides, - name=conv_name_base + '1')(input_tensor) + shortcut = Conv2D(filters3, (1, 1), strides=strides, + name=conv_name_base + '1')(input_tensor) shortcut = BatchNormalization(axis=bn_axis, name=bn_name_base + '1')(shortcut) - x = merge([x, shortcut], mode='sum') + x = layers.add([x, shortcut]) x = Activation('relu')(x) return x def ResNet50(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the ResNet50 architecture, - optionally loading weights pre-trained + input_tensor=None, input_shape=None, + pooling=None, + classes=1000): + """Instantiates the ResNet50 architecture. + + Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set - `image_dim_ordering="tf"` in your Keras config + `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering + TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. @@ -127,42 +145,67 @@ def ResNet50(include_top=True, weights='imagenet', layers at the top of the network. weights: one of `None` (random initialization) or "imagenet" (pre-training on ImageNet). - input_tensor: optional Keras tensor (i.e. xput of `layers.Input()`) + input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 244)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 197. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. # Returns A Keras model instance. - ''' + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=197, + data_format=K.image_data_format(), + include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) + img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor - if K.image_dim_ordering() == 'tf': + if K.image_data_format() == 'channels_last': bn_axis = 3 else: bn_axis = 1 x = ZeroPadding2D((3, 3))(img_input) - x = Convolution2D(64, 7, 7, subsample=(2, 2), name='conv1')(x) + x = Conv2D(64, (7, 7), strides=(2, 2), name='conv1')(x) x = BatchNormalization(axis=bn_axis, name='bn_conv1')(x) x = Activation('relu')(x) x = MaxPooling2D((3, 3), strides=(2, 2))(x) @@ -191,49 +234,54 @@ def ResNet50(include_top=True, weights='imagenet', if include_top: x = Flatten()(x) - x = Dense(1000, activation='softmax', name='fc1000')(x) - - model = Model(img_input, x) + x = Dense(classes, activation='softmax', name='fc1000')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = Model(inputs, x, name='resnet50') # load weights if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': + if include_top: + weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models', + md5_hash='a7b3fe01876f51b976af0dea6bc144eb') + else: + weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models', + md5_hash='a268eb855778b3df3c7506639542a6af') + model.load_weights(weights_path) + if K.backend() == 'theano': + layer_utils.convert_all_kernels_in_model(model) + + if K.image_data_format() == 'channels_first': if include_top: - weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='1c1f8f5b0c8ee28fe9d950625a230e1c') - else: - weights_path = get_file('resnet50_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='f64f049c92468c9affcd44b0976cdafe') - model.load_weights(weights_path) + maxpool = model.get_layer(name='avg_pool') + shape = maxpool.output_shape[1:] + dense = model.get_layer(name='fc1000') + layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') + if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' 'For best performance, set ' - '`image_dim_ordering="tf"` in ' + '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models', - md5_hash='a7b3fe01876f51b976af0dea6bc144eb') - else: - weights_path = get_file('resnet50_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models', - md5_hash='a268eb855778b3df3c7506639542a6af') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) return model diff --git a/vgg16.py b/vgg16.py index 5beaf2d..373c5c2 100644 --- a/vgg16.py +++ b/vgg16.py @@ -12,32 +12,41 @@ import warnings from keras.models import Model -from keras.layers import Flatten, Dense, Input -from keras.layers import Convolution2D, MaxPooling2D +from keras.layers import Flatten +from keras.layers import Dense +from keras.layers import Input +from keras.layers import Conv2D +from keras.layers import MaxPooling2D +from keras.layers import GlobalMaxPooling2D +from keras.layers import GlobalAveragePooling2D from keras.preprocessing import image -from keras.utils.layer_utils import convert_all_kernels_in_model +from keras.utils import layer_utils from keras.utils.data_utils import get_file from keras import backend as K -from imagenet_utils import decode_predictions, preprocess_input +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import preprocess_input +from keras.applications.imagenet_utils import _obtain_input_shape +from keras.engine.topology import get_source_inputs -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' +WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels.h5' +WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5' def VGG16(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the VGG16 architecture, - optionally loading weights pre-trained + input_tensor=None, input_shape=None, + pooling=None, + classes=1000): + """Instantiates the VGG16 architecture. + + Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set - `image_dim_ordering="tf"` in your Keras config + `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering + TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. @@ -48,59 +57,83 @@ def VGG16(include_top=True, weights='imagenet', or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 244)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 48. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. # Returns A Keras model instance. - ''' + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=48, + data_format=K.image_data_format(), + include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) + img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 - x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv1')(img_input) - x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv2')(x) + x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) + x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 - x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv1')(x) - x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv2')(x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv1')(x) - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv2')(x) - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv3')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv3')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv3')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: @@ -108,46 +141,52 @@ def VGG16(include_top=True, weights='imagenet', x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) + x = Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = Model(inputs, x, name='vgg16') # load weights if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': + if include_top: + weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models') + else: + weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models') + model.load_weights(weights_path) + if K.backend() == 'theano': + layer_utils.convert_all_kernels_in_model(model) + + if K.image_data_format() == 'channels_first': if include_top: - weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg16_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) + maxpool = model.get_layer(name='block5_pool') + shape = maxpool.output_shape[1:] + dense = model.get_layer(name='fc1') + layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') + if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' 'For best performance, set ' - '`image_dim_ordering="tf"` in ' + '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg16_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) return model diff --git a/vgg19.py b/vgg19.py index 435d2e1..a9a64c7 100644 --- a/vgg19.py +++ b/vgg19.py @@ -13,31 +13,38 @@ from keras.models import Model from keras.layers import Flatten, Dense, Input -from keras.layers import Convolution2D, MaxPooling2D +from keras.layers import Conv2D +from keras.layers import MaxPooling2D +from keras.layers import GlobalMaxPooling2D +from keras.layers import GlobalAveragePooling2D from keras.preprocessing import image -from keras.utils.layer_utils import convert_all_kernels_in_model +from keras.utils import layer_utils from keras.utils.data_utils import get_file from keras import backend as K -from imagenet_utils import decode_predictions, preprocess_input +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import preprocess_input +from keras.applications.imagenet_utils import _obtain_input_shape +from keras.engine.topology import get_source_inputs -TH_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels.h5' -TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5' -TH_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_th_dim_ordering_th_kernels_notop.h5' -TF_WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5' +WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels.h5' +WEIGHTS_PATH_NO_TOP = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.1/vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5' def VGG19(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the VGG19 architecture, - optionally loading weights pre-trained + input_tensor=None, input_shape=None, + pooling=None, + classes=1000): + """Instantiates the VGG19 architecture. + + Optionally loads weights pre-trained on ImageNet. Note that when using TensorFlow, for best performance you should set - `image_dim_ordering="tf"` in your Keras config + `image_data_format="channels_last"` in your Keras config at ~/.keras/keras.json. The model and the weights are compatible with both - TensorFlow and Theano. The dimension ordering + TensorFlow and Theano. The data format convention used by the model is the one specified in your Keras config file. @@ -48,62 +55,86 @@ def VGG19(include_top=True, weights='imagenet', or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(224, 224, 3)` (with `channels_last` data format) + or `(3, 224, 244)` (with `channels_first` data format). + It should have exactly 3 inputs channels, + and width and height should be no smaller than 48. + E.g. `(200, 200, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. # Returns A Keras model instance. - ''' + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') # Determine proper input shape - if K.image_dim_ordering() == 'th': - if include_top: - input_shape = (3, 224, 224) - else: - input_shape = (3, None, None) - else: - if include_top: - input_shape = (224, 224, 3) - else: - input_shape = (None, None, 3) + input_shape = _obtain_input_shape(input_shape, + default_size=224, + min_size=48, + data_format=K.image_data_format(), + include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) else: if not K.is_keras_tensor(input_tensor): - img_input = Input(tensor=input_tensor) + img_input = Input(tensor=input_tensor, shape=input_shape) else: img_input = input_tensor # Block 1 - x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv1')(img_input) - x = Convolution2D(64, 3, 3, activation='relu', border_mode='same', name='block1_conv2')(x) + x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv1')(img_input) + x = Conv2D(64, (3, 3), activation='relu', padding='same', name='block1_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block1_pool')(x) # Block 2 - x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv1')(x) - x = Convolution2D(128, 3, 3, activation='relu', border_mode='same', name='block2_conv2')(x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv1')(x) + x = Conv2D(128, (3, 3), activation='relu', padding='same', name='block2_conv2')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block2_pool')(x) # Block 3 - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv1')(x) - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv2')(x) - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv3')(x) - x = Convolution2D(256, 3, 3, activation='relu', border_mode='same', name='block3_conv4')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv1')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv2')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv3')(x) + x = Conv2D(256, (3, 3), activation='relu', padding='same', name='block3_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block3_pool')(x) # Block 4 - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv3')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block4_conv4')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv3')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block4_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block4_pool')(x) # Block 5 - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv1')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv2')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv3')(x) - x = Convolution2D(512, 3, 3, activation='relu', border_mode='same', name='block5_conv4')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv1')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv2')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv3')(x) + x = Conv2D(512, (3, 3), activation='relu', padding='same', name='block5_conv4')(x) x = MaxPooling2D((2, 2), strides=(2, 2), name='block5_pool')(x) if include_top: @@ -111,46 +142,52 @@ def VGG19(include_top=True, weights='imagenet', x = Flatten(name='flatten')(x) x = Dense(4096, activation='relu', name='fc1')(x) x = Dense(4096, activation='relu', name='fc2')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) + x = Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = Model(inputs, x, name='vgg19') # load weights if weights == 'imagenet': - print('K.image_dim_ordering:', K.image_dim_ordering()) - if K.image_dim_ordering() == 'th': + if include_top: + weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5', + WEIGHTS_PATH, + cache_subdir='models') + else: + weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', + WEIGHTS_PATH_NO_TOP, + cache_subdir='models') + model.load_weights(weights_path) + if K.backend() == 'theano': + layer_utils.convert_all_kernels_in_model(model) + + if K.image_data_format() == 'channels_first': if include_top: - weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels.h5', - TH_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg19_weights_th_dim_ordering_th_kernels_notop.h5', - TH_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) + maxpool = model.get_layer(name='block5_pool') + shape = maxpool.output_shape[1:] + dense = model.get_layer(name='fc1') + layer_utils.convert_dense_weights_data_format(dense, shape, 'channels_first') + if K.backend() == 'tensorflow': warnings.warn('You are using the TensorFlow backend, yet you ' 'are using the Theano ' - 'image dimension ordering convention ' - '(`image_dim_ordering="th"`). ' + 'image data format convention ' + '(`image_data_format="channels_first"`). ' 'For best performance, set ' - '`image_dim_ordering="tf"` in ' + '`image_data_format="channels_last"` in ' 'your Keras config ' 'at ~/.keras/keras.json.') - convert_all_kernels_in_model(model) - else: - if include_top: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels.h5', - TF_WEIGHTS_PATH, - cache_subdir='models') - else: - weights_path = get_file('vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', - TF_WEIGHTS_PATH_NO_TOP, - cache_subdir='models') - model.load_weights(weights_path) - if K.backend() == 'theano': - convert_all_kernels_in_model(model) return model diff --git a/xception.py b/xception.py index bcf541c..b1797fc 100644 --- a/xception.py +++ b/xception.py @@ -23,13 +23,24 @@ import warnings import numpy as np -from keras.models import Model -from keras.layers import Dense, Input, BatchNormalization, Activation, merge -from keras.layers import Conv2D, SeparableConv2D, MaxPooling2D, GlobalAveragePooling2D from keras.preprocessing import image + +from keras.models import Model +from keras import layers +from keras.layers import Dense +from keras.layers import Input +from keras.layers import BatchNormalization +from keras.layers import Activation +from keras.layers import Conv2D +from keras.layers import SeparableConv2D +from keras.layers import MaxPooling2D +from keras.layers import GlobalAveragePooling2D +from keras.layers import GlobalMaxPooling2D +from keras.engine.topology import get_source_inputs from keras.utils.data_utils import get_file from keras import backend as K -from imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import decode_predictions +from keras.applications.imagenet_utils import _obtain_input_shape TF_WEIGHTS_PATH = 'https://github.com/fchollet/deep-learning-models/releases/download/v0.4/xception_weights_tf_dim_ordering_tf_kernels.h5' @@ -37,13 +48,16 @@ def Xception(include_top=True, weights='imagenet', - input_tensor=None): - '''Instantiate the Xception architecture, - optionally loading weights pre-trained + input_tensor=None, input_shape=None, + pooling=None, + classes=1000): + """Instantiates the Xception architecture. + + Optionally loads weights pre-trained on ImageNet. This model is available for TensorFlow only, and can only be used with inputs following the TensorFlow - dimension ordering `(width, height, channels)`. - You should set `image_dim_ordering="tf"` in your Keras config + data format `(width, height, channels)`. + You should set `image_data_format="channels_last"` in your Keras config located at ~/.keras/keras.json. Note that the default input image size for this model is 299x299. @@ -55,37 +69,69 @@ def Xception(include_top=True, weights='imagenet', or "imagenet" (pre-training on ImageNet). input_tensor: optional Keras tensor (i.e. output of `layers.Input()`) to use as image input for the model. + input_shape: optional shape tuple, only to be specified + if `include_top` is False (otherwise the input shape + has to be `(299, 299, 3)`. + It should have exactly 3 inputs channels, + and width and height should be no smaller than 71. + E.g. `(150, 150, 3)` would be one valid value. + pooling: Optional pooling mode for feature extraction + when `include_top` is `False`. + - `None` means that the output of the model will be + the 4D tensor output of the + last convolutional layer. + - `avg` means that global average pooling + will be applied to the output of the + last convolutional layer, and thus + the output of the model will be a 2D tensor. + - `max` means that global max pooling will + be applied. + classes: optional number of classes to classify images + into, only to be specified if `include_top` is True, and + if no `weights` argument is specified. # Returns A Keras model instance. - ''' + + # Raises + ValueError: in case of invalid argument for `weights`, + or invalid input shape. + RuntimeError: If attempting to run this model with a + backend that does not support separable convolutions. + """ if weights not in {'imagenet', None}: raise ValueError('The `weights` argument should be either ' '`None` (random initialization) or `imagenet` ' '(pre-training on ImageNet).') + + if weights == 'imagenet' and include_top and classes != 1000: + raise ValueError('If using `weights` as imagenet with `include_top`' + ' as true, `classes` should be 1000') + if K.backend() != 'tensorflow': - raise Exception('The Xception model is only available with ' - 'the TensorFlow backend.') - if K.image_dim_ordering() != 'tf': + raise RuntimeError('The Xception model is only available with ' + 'the TensorFlow backend.') + if K.image_data_format() != 'channels_last': warnings.warn('The Xception model is only available for the ' - 'input dimension ordering "tf" ' + 'input data format "channels_last" ' '(width, height, channels). ' 'However your settings specify the default ' - 'dimension ordering "th" (channels, width, height). ' - 'You should set `image_dim_ordering="tf"` in your Keras ' + 'data format "channels_first" (channels, width, height). ' + 'You should set `image_data_format="channels_last"` in your Keras ' 'config located at ~/.keras/keras.json. ' 'The model being returned right now will expect inputs ' - 'to follow the "tf" dimension ordering.') - K.set_image_dim_ordering('tf') - old_dim_ordering = 'th' + 'to follow the "channels_last" data format.') + K.set_image_data_format('channels_last') + old_data_format = 'channels_first' else: - old_dim_ordering = None + old_data_format = None # Determine proper input shape - if include_top: - input_shape = (299, 299, 3) - else: - input_shape = (None, None, 3) + input_shape = _obtain_input_shape(input_shape, + default_size=299, + min_size=71, + data_format=K.image_data_format(), + include_top=include_top) if input_tensor is None: img_input = Input(shape=input_shape) @@ -95,98 +141,109 @@ def Xception(include_top=True, weights='imagenet', else: img_input = input_tensor - x = Conv2D(32, 3, 3, subsample=(2, 2), bias=False, name='block1_conv1')(img_input) + x = Conv2D(32, (3, 3), strides=(2, 2), use_bias=False, name='block1_conv1')(img_input) x = BatchNormalization(name='block1_conv1_bn')(x) x = Activation('relu', name='block1_conv1_act')(x) - x = Conv2D(64, 3, 3, bias=False, name='block1_conv2')(x) + x = Conv2D(64, (3, 3), use_bias=False, name='block1_conv2')(x) x = BatchNormalization(name='block1_conv2_bn')(x) x = Activation('relu', name='block1_conv2_act')(x) - residual = Conv2D(128, 1, 1, subsample=(2, 2), - border_mode='same', bias=False)(x) + residual = Conv2D(128, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) - x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv1')(x) + x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv1')(x) x = BatchNormalization(name='block2_sepconv1_bn')(x) x = Activation('relu', name='block2_sepconv2_act')(x) - x = SeparableConv2D(128, 3, 3, border_mode='same', bias=False, name='block2_sepconv2')(x) + x = SeparableConv2D(128, (3, 3), padding='same', use_bias=False, name='block2_sepconv2')(x) x = BatchNormalization(name='block2_sepconv2_bn')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block2_pool')(x) - x = merge([x, residual], mode='sum') + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block2_pool')(x) + x = layers.add([x, residual]) - residual = Conv2D(256, 1, 1, subsample=(2, 2), - border_mode='same', bias=False)(x) + residual = Conv2D(256, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block3_sepconv1_act')(x) - x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv1')(x) + x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv1')(x) x = BatchNormalization(name='block3_sepconv1_bn')(x) x = Activation('relu', name='block3_sepconv2_act')(x) - x = SeparableConv2D(256, 3, 3, border_mode='same', bias=False, name='block3_sepconv2')(x) + x = SeparableConv2D(256, (3, 3), padding='same', use_bias=False, name='block3_sepconv2')(x) x = BatchNormalization(name='block3_sepconv2_bn')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block3_pool')(x) - x = merge([x, residual], mode='sum') + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block3_pool')(x) + x = layers.add([x, residual]) - residual = Conv2D(728, 1, 1, subsample=(2, 2), - border_mode='same', bias=False)(x) + residual = Conv2D(728, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block4_sepconv1_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv1')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv1')(x) x = BatchNormalization(name='block4_sepconv1_bn')(x) x = Activation('relu', name='block4_sepconv2_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block4_sepconv2')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block4_sepconv2')(x) x = BatchNormalization(name='block4_sepconv2_bn')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block4_pool')(x) - x = merge([x, residual], mode='sum') + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block4_pool')(x) + x = layers.add([x, residual]) for i in range(8): residual = x prefix = 'block' + str(i + 5) x = Activation('relu', name=prefix + '_sepconv1_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv1')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv1')(x) x = BatchNormalization(name=prefix + '_sepconv1_bn')(x) x = Activation('relu', name=prefix + '_sepconv2_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv2')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv2')(x) x = BatchNormalization(name=prefix + '_sepconv2_bn')(x) x = Activation('relu', name=prefix + '_sepconv3_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name=prefix + '_sepconv3')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name=prefix + '_sepconv3')(x) x = BatchNormalization(name=prefix + '_sepconv3_bn')(x) - x = merge([x, residual], mode='sum') + x = layers.add([x, residual]) - residual = Conv2D(1024, 1, 1, subsample=(2, 2), - border_mode='same', bias=False)(x) + residual = Conv2D(1024, (1, 1), strides=(2, 2), + padding='same', use_bias=False)(x) residual = BatchNormalization()(residual) x = Activation('relu', name='block13_sepconv1_act')(x) - x = SeparableConv2D(728, 3, 3, border_mode='same', bias=False, name='block13_sepconv1')(x) + x = SeparableConv2D(728, (3, 3), padding='same', use_bias=False, name='block13_sepconv1')(x) x = BatchNormalization(name='block13_sepconv1_bn')(x) x = Activation('relu', name='block13_sepconv2_act')(x) - x = SeparableConv2D(1024, 3, 3, border_mode='same', bias=False, name='block13_sepconv2')(x) + x = SeparableConv2D(1024, (3, 3), padding='same', use_bias=False, name='block13_sepconv2')(x) x = BatchNormalization(name='block13_sepconv2_bn')(x) - x = MaxPooling2D((3, 3), strides=(2, 2), border_mode='same', name='block13_pool')(x) - x = merge([x, residual], mode='sum') + x = MaxPooling2D((3, 3), strides=(2, 2), padding='same', name='block13_pool')(x) + x = layers.add([x, residual]) - x = SeparableConv2D(1536, 3, 3, border_mode='same', bias=False, name='block14_sepconv1')(x) + x = SeparableConv2D(1536, (3, 3), padding='same', use_bias=False, name='block14_sepconv1')(x) x = BatchNormalization(name='block14_sepconv1_bn')(x) x = Activation('relu', name='block14_sepconv1_act')(x) - x = SeparableConv2D(2048, 3, 3, border_mode='same', bias=False, name='block14_sepconv2')(x) + x = SeparableConv2D(2048, (3, 3), padding='same', use_bias=False, name='block14_sepconv2')(x) x = BatchNormalization(name='block14_sepconv2_bn')(x) x = Activation('relu', name='block14_sepconv2_act')(x) if include_top: x = GlobalAveragePooling2D(name='avg_pool')(x) - x = Dense(1000, activation='softmax', name='predictions')(x) - - # Create model - model = Model(img_input, x) + x = Dense(classes, activation='softmax', name='predictions')(x) + else: + if pooling == 'avg': + x = GlobalAveragePooling2D()(x) + elif pooling == 'max': + x = GlobalMaxPooling2D()(x) + + # Ensure that the model takes into account + # any potential predecessors of `input_tensor`. + if input_tensor is not None: + inputs = get_source_inputs(input_tensor) + else: + inputs = img_input + # Create model. + model = Model(inputs, x, name='xception') # load weights if weights == 'imagenet': @@ -200,8 +257,8 @@ def Xception(include_top=True, weights='imagenet', cache_subdir='models') model.load_weights(weights_path) - if old_dim_ordering: - K.set_image_dim_ordering(old_dim_ordering) + if old_data_format: + K.set_image_data_format(old_data_format) return model @@ -223,4 +280,5 @@ def preprocess_input(x): print('Input image shape:', x.shape) preds = model.predict(x) + print(np.argmax(preds)) print('Predicted:', decode_predictions(preds, 1))