From a1d2c2ba3d6dd1792de20542eeb56a015ea83608 Mon Sep 17 00:00:00 2001 From: zsdonghao Date: Thu, 22 Feb 2018 13:41:54 +0000 Subject: [PATCH] [release] 1.2.0 for TL 1.7.5 --- README.md | 2 + main.py | 116 +- model.py | 185 +- tensorlayer/__init__.py | 8 +- tensorlayer/_logging.py | 7 + tensorlayer/activation.py | 141 +- tensorlayer/cli/__init__.py | 1 + tensorlayer/cli/__main__.py | 13 + tensorlayer/cli/train.py | 169 ++ tensorlayer/cost.py | 776 +++--- tensorlayer/db.py | 266 +- tensorlayer/distributed.py | 294 ++- tensorlayer/files.py | 1038 ++++---- tensorlayer/iterate.py | 266 +- tensorlayer/layers/__init__.py | 26 + tensorlayer/layers/convolution.py | 1606 ++++++++++++ tensorlayer/layers/core.py | 1312 ++++++++++ tensorlayer/layers/extend.py | 77 + tensorlayer/layers/flow_control.py | 87 + tensorlayer/layers/importer.py | 213 ++ tensorlayer/layers/merge.py | 130 + tensorlayer/layers/normalization.py | 267 ++ tensorlayer/layers/object_detection.py | 51 + tensorlayer/layers/padding.py | 41 + tensorlayer/layers/pooling.py | 267 ++ tensorlayer/layers/recurrent.py | 1601 ++++++++++++ tensorlayer/layers/shape.py | 129 + tensorlayer/layers/spatial_transformer.py | 281 +++ tensorlayer/layers/special_activation.py | 58 + tensorlayer/layers/stack.py | 97 + tensorlayer/layers/super_resolution.py | 166 ++ tensorlayer/layers/time_distribution.py | 78 + tensorlayer/nlp.py | 570 +++-- tensorlayer/prepro.py | 2239 ++++++++++------- tensorlayer/rein.py | 69 +- .../roi_pooling/roi_pooling_ops.py | 13 +- .../roi_pooling/roi_pooling_test.py | 26 +- .../roi_pooling/roi_pooling_example.py | 20 +- tensorlayer/third_party/roi_pooling/setup.py | 28 +- .../third_party/roi_pooling/test_roi_layer.py | 20 +- tensorlayer/utils.py | 593 +++-- tensorlayer/visualize.py | 402 +-- 42 files changed, 10435 insertions(+), 3314 deletions(-) create mode 100644 tensorlayer/_logging.py create mode 100644 tensorlayer/cli/__init__.py create mode 100644 tensorlayer/cli/__main__.py create mode 100644 tensorlayer/cli/train.py create mode 100644 tensorlayer/layers/__init__.py create mode 100644 tensorlayer/layers/convolution.py create mode 100644 tensorlayer/layers/core.py create mode 100644 tensorlayer/layers/extend.py create mode 100644 tensorlayer/layers/flow_control.py create mode 100644 tensorlayer/layers/importer.py create mode 100644 tensorlayer/layers/merge.py create mode 100644 tensorlayer/layers/normalization.py create mode 100644 tensorlayer/layers/object_detection.py create mode 100644 tensorlayer/layers/padding.py create mode 100644 tensorlayer/layers/pooling.py create mode 100644 tensorlayer/layers/recurrent.py create mode 100644 tensorlayer/layers/shape.py create mode 100644 tensorlayer/layers/spatial_transformer.py create mode 100644 tensorlayer/layers/special_activation.py create mode 100644 tensorlayer/layers/stack.py create mode 100644 tensorlayer/layers/super_resolution.py create mode 100644 tensorlayer/layers/time_distribution.py diff --git a/README.md b/README.md index 9c531c07..e54dffb1 100755 --- a/README.md +++ b/README.md @@ -2,7 +2,9 @@ We run this script under [TensorFlow](https://www.tensorflow.org) 1.2 and the self-contained [TensorLayer](http://tensorlayer.readthedocs.io/en/latest/). If you got error, you may need to update TensorLayer. + ### SRGAN Architecture diff --git a/main.py b/main.py index 0d1ff6f5..c60e67d9 100755 --- a/main.py +++ b/main.py @@ -27,16 +27,6 @@ ni = int(np.sqrt(batch_size)) -def read_all_imgs(img_list, path='', n_threads=32): - """ Returns all images in array by given path and name of each image file. """ - imgs = [] - for idx in range(0, len(img_list), n_threads): - b_imgs_list = img_list[idx : idx + n_threads] - b_imgs = tl.prepro.threading_data(b_imgs_list, fn=get_imgs_fn, path=path) - # print(b_imgs.shape) - imgs.extend(b_imgs) - print('read %d from %s' % (len(imgs), path)) - return imgs def train(): ## create folders to save result images and trained model @@ -54,13 +44,13 @@ def train(): valid_lr_img_list = sorted(tl.files.load_file_list(path=config.VALID.lr_img_path, regx='.*.png', printable=False)) ## If your machine have enough memory, please pre-load the whole train set. - train_hr_imgs = read_all_imgs(train_hr_img_list, path=config.TRAIN.hr_img_path, n_threads=32) + train_hr_imgs = tl.vis.read_images(train_hr_img_list, path=config.TRAIN.hr_img_path, n_threads=32) # for im in train_hr_imgs: # print(im.shape) - # valid_lr_imgs = read_all_imgs(valid_lr_img_list, path=config.VALID.lr_img_path, n_threads=32) + # valid_lr_imgs = tl.vis.read_images(valid_lr_img_list, path=config.VALID.lr_img_path, n_threads=32) # for im in valid_lr_imgs: # print(im.shape) - # valid_hr_imgs = read_all_imgs(valid_hr_img_list, path=config.VALID.hr_img_path, n_threads=32) + # valid_hr_imgs = tl.vis.read_images(valid_hr_img_list, path=config.VALID.hr_img_path, n_threads=32) # for im in valid_hr_imgs: # print(im.shape) # exit() @@ -72,17 +62,19 @@ def train(): net_g = SRGAN_g(t_image, is_train=True, reuse=False) net_d, logits_real = SRGAN_d(t_target_image, is_train=True, reuse=False) - _, logits_fake = SRGAN_d(net_g.outputs, is_train=True, reuse=True) + _, logits_fake = SRGAN_d(net_g.outputs, is_train=True, reuse=True) net_g.print_params(False) net_d.print_params(False) ## vgg inference. 0, 1, 2, 3 BILINEAR NEAREST BICUBIC AREA - t_target_image_224 = tf.image.resize_images(t_target_image, size=[224, 224], method=0, align_corners=False) # resize_target_image_for_vgg # http://tensorlayer.readthedocs.io/en/latest/_modules/tensorlayer/layers.html#UpSampling2dLayer - t_predict_image_224 = tf.image.resize_images(net_g.outputs, size=[224, 224], method=0, align_corners=False) # resize_generate_image_for_vgg + t_target_image_224 = tf.image.resize_images( + t_target_image, size=[224, 224], method=0, + align_corners=False) # resize_target_image_for_vgg # http://tensorlayer.readthedocs.io/en/latest/_modules/tensorlayer/layers.html#UpSampling2dLayer + t_predict_image_224 = tf.image.resize_images(net_g.outputs, size=[224, 224], method=0, align_corners=False) # resize_generate_image_for_vgg - net_vgg, vgg_target_emb = Vgg19_simple_api((t_target_image_224+1)/2, reuse=False) - _, vgg_predict_emb = Vgg19_simple_api((t_predict_image_224+1)/2, reuse=True) + net_vgg, vgg_target_emb = Vgg19_simple_api((t_target_image_224 + 1) / 2, reuse=False) + _, vgg_predict_emb = Vgg19_simple_api((t_predict_image_224 + 1) / 2, reuse=True) ## test inference net_g_test = SRGAN_g(t_image, is_train=False, reuse=True) @@ -93,7 +85,7 @@ def train(): d_loss = d_loss1 + d_loss2 g_gan_loss = 1e-3 * tl.cost.sigmoid_cross_entropy(logits_fake, tf.ones_like(logits_fake), name='g') - mse_loss = tl.cost.mean_squared_error(net_g.outputs , t_target_image, is_mean=True) + mse_loss = tl.cost.mean_squared_error(net_g.outputs, t_target_image, is_mean=True) vgg_loss = 2e-6 * tl.cost.mean_squared_error(vgg_predict_emb.outputs, vgg_target_emb.outputs, is_mean=True) g_loss = mse_loss + vgg_loss + g_gan_loss @@ -112,9 +104,9 @@ def train(): ###========================== RESTORE MODEL =============================### sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) tl.layers.initialize_global_variables(sess) - if tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir+'/g_{}.npz'.format(tl.global_flag['mode']), network=net_g) is False: - tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir+'/g_{}_init.npz'.format(tl.global_flag['mode']), network=net_g) - tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir+'/d_{}.npz'.format(tl.global_flag['mode']), network=net_d) + if tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir + '/g_{}.npz'.format(tl.global_flag['mode']), network=net_g) is False: + tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir + '/g_{}_init.npz'.format(tl.global_flag['mode']), network=net_g) + tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir + '/d_{}.npz'.format(tl.global_flag['mode']), network=net_d) ###============================= LOAD VGG ===============================### vgg19_npy_path = "vgg19.npy" @@ -124,7 +116,7 @@ def train(): npz = np.load(vgg19_npy_path, encoding='latin1').item() params = [] - for val in sorted( npz.items() ): + for val in sorted(npz.items()): W = np.asarray(val[1][0]) b = np.asarray(val[1][1]) print(" Loading %s: %s, %s" % (val[0], W.shape, b.shape)) @@ -136,21 +128,21 @@ def train(): ###============================= TRAINING ===============================### ## use first `batch_size` of train set to have a quick test during training sample_imgs = train_hr_imgs[0:batch_size] - # sample_imgs = read_all_imgs(train_hr_img_list[0:batch_size], path=config.TRAIN.hr_img_path, n_threads=32) # if no pre-load train set + # sample_imgs = tl.vis.read_images(train_hr_img_list[0:batch_size], path=config.TRAIN.hr_img_path, n_threads=32) # if no pre-load train set sample_imgs_384 = tl.prepro.threading_data(sample_imgs, fn=crop_sub_imgs_fn, is_random=False) - print('sample HR sub-image:',sample_imgs_384.shape, sample_imgs_384.min(), sample_imgs_384.max()) + print('sample HR sub-image:', sample_imgs_384.shape, sample_imgs_384.min(), sample_imgs_384.max()) sample_imgs_96 = tl.prepro.threading_data(sample_imgs_384, fn=downsample_fn) print('sample LR sub-image:', sample_imgs_96.shape, sample_imgs_96.min(), sample_imgs_96.max()) - tl.vis.save_images(sample_imgs_96, [ni, ni], save_dir_ginit+'/_train_sample_96.png') - tl.vis.save_images(sample_imgs_384, [ni, ni], save_dir_ginit+'/_train_sample_384.png') - tl.vis.save_images(sample_imgs_96, [ni, ni], save_dir_gan+'/_train_sample_96.png') - tl.vis.save_images(sample_imgs_384, [ni, ni], save_dir_gan+'/_train_sample_384.png') + tl.vis.save_images(sample_imgs_96, [ni, ni], save_dir_ginit + '/_train_sample_96.png') + tl.vis.save_images(sample_imgs_384, [ni, ni], save_dir_ginit + '/_train_sample_384.png') + tl.vis.save_images(sample_imgs_96, [ni, ni], save_dir_gan + '/_train_sample_96.png') + tl.vis.save_images(sample_imgs_384, [ni, ni], save_dir_gan + '/_train_sample_384.png') ###========================= initialize G ====================### ## fixed learning rate sess.run(tf.assign(lr_v, lr_init)) print(" ** fixed learning rate: %f (for init G)" % lr_init) - for epoch in range(0, n_epoch_init+1): + for epoch in range(0, n_epoch_init + 1): epoch_time = time.time() total_mse_loss, n_iter = 0, 0 @@ -167,33 +159,31 @@ def train(): ## If your machine have enough memory, please pre-load the whole train set. for idx in range(0, len(train_hr_imgs), batch_size): step_time = time.time() - b_imgs_384 = tl.prepro.threading_data( - train_hr_imgs[idx : idx + batch_size], - fn=crop_sub_imgs_fn, is_random=True) + b_imgs_384 = tl.prepro.threading_data(train_hr_imgs[idx:idx + batch_size], fn=crop_sub_imgs_fn, is_random=True) b_imgs_96 = tl.prepro.threading_data(b_imgs_384, fn=downsample_fn) ## update G errM, _ = sess.run([mse_loss, g_optim_init], {t_image: b_imgs_96, t_target_image: b_imgs_384}) print("Epoch [%2d/%2d] %4d time: %4.4fs, mse: %.8f " % (epoch, n_epoch_init, n_iter, time.time() - step_time, errM)) total_mse_loss += errM n_iter += 1 - log = "[*] Epoch: [%2d/%2d] time: %4.4fs, mse: %.8f" % (epoch, n_epoch_init, time.time() - epoch_time, total_mse_loss/n_iter) + log = "[*] Epoch: [%2d/%2d] time: %4.4fs, mse: %.8f" % (epoch, n_epoch_init, time.time() - epoch_time, total_mse_loss / n_iter) print(log) ## quick evaluation on train set if (epoch != 0) and (epoch % 10 == 0): - out = sess.run(net_g_test.outputs, {t_image: sample_imgs_96})#; print('gen sub-image:', out.shape, out.min(), out.max()) + out = sess.run(net_g_test.outputs, {t_image: sample_imgs_96}) #; print('gen sub-image:', out.shape, out.min(), out.max()) print("[*] save images") - tl.vis.save_images(out, [ni, ni], save_dir_ginit+'/train_%d.png' % epoch) + tl.vis.save_images(out, [ni, ni], save_dir_ginit + '/train_%d.png' % epoch) ## save model if (epoch != 0) and (epoch % 10 == 0): - tl.files.save_npz(net_g.all_params, name=checkpoint_dir+'/g_{}_init.npz'.format(tl.global_flag['mode']), sess=sess) + tl.files.save_npz(net_g.all_params, name=checkpoint_dir + '/g_{}_init.npz'.format(tl.global_flag['mode']), sess=sess) ###========================= train GAN (SRGAN) =========================### - for epoch in range(0, n_epoch+1): + for epoch in range(0, n_epoch + 1): ## update learning rate - if epoch !=0 and (epoch % decay_every == 0): - new_lr_decay = lr_decay ** (epoch // decay_every) + if epoch != 0 and (epoch % decay_every == 0): + new_lr_decay = lr_decay**(epoch // decay_every) sess.run(tf.assign(lr_v, lr_init * new_lr_decay)) log = " ** new learning rate: %f (for GAN)" % (lr_init * new_lr_decay) print(log) @@ -218,32 +208,33 @@ def train(): ## If your machine have enough memory, please pre-load the whole train set. for idx in range(0, len(train_hr_imgs), batch_size): step_time = time.time() - b_imgs_384 = tl.prepro.threading_data( - train_hr_imgs[idx : idx + batch_size], - fn=crop_sub_imgs_fn, is_random=True) + b_imgs_384 = tl.prepro.threading_data(train_hr_imgs[idx:idx + batch_size], fn=crop_sub_imgs_fn, is_random=True) b_imgs_96 = tl.prepro.threading_data(b_imgs_384, fn=downsample_fn) ## update D errD, _ = sess.run([d_loss, d_optim], {t_image: b_imgs_96, t_target_image: b_imgs_384}) ## update G errG, errM, errV, errA, _ = sess.run([g_loss, mse_loss, vgg_loss, g_gan_loss, g_optim], {t_image: b_imgs_96, t_target_image: b_imgs_384}) - print("Epoch [%2d/%2d] %4d time: %4.4fs, d_loss: %.8f g_loss: %.8f (mse: %.6f vgg: %.6f adv: %.6f)" % (epoch, n_epoch, n_iter, time.time() - step_time, errD, errG, errM, errV, errA)) + print("Epoch [%2d/%2d] %4d time: %4.4fs, d_loss: %.8f g_loss: %.8f (mse: %.6f vgg: %.6f adv: %.6f)" % + (epoch, n_epoch, n_iter, time.time() - step_time, errD, errG, errM, errV, errA)) total_d_loss += errD total_g_loss += errG n_iter += 1 - log = "[*] Epoch: [%2d/%2d] time: %4.4fs, d_loss: %.8f g_loss: %.8f" % (epoch, n_epoch, time.time() - epoch_time, total_d_loss/n_iter, total_g_loss/n_iter) + log = "[*] Epoch: [%2d/%2d] time: %4.4fs, d_loss: %.8f g_loss: %.8f" % (epoch, n_epoch, time.time() - epoch_time, total_d_loss / n_iter, + total_g_loss / n_iter) print(log) ## quick evaluation on train set if (epoch != 0) and (epoch % 10 == 0): - out = sess.run(net_g_test.outputs, {t_image: sample_imgs_96})#; print('gen sub-image:', out.shape, out.min(), out.max()) + out = sess.run(net_g_test.outputs, {t_image: sample_imgs_96}) #; print('gen sub-image:', out.shape, out.min(), out.max()) print("[*] save images") - tl.vis.save_images(out, [ni, ni], save_dir_gan+'/train_%d.png' % epoch) + tl.vis.save_images(out, [ni, ni], save_dir_gan + '/train_%d.png' % epoch) ## save model if (epoch != 0) and (epoch % 10 == 0): - tl.files.save_npz(net_g.all_params, name=checkpoint_dir+'/g_{}.npz'.format(tl.global_flag['mode']), sess=sess) - tl.files.save_npz(net_d.all_params, name=checkpoint_dir+'/d_{}.npz'.format(tl.global_flag['mode']), sess=sess) + tl.files.save_npz(net_g.all_params, name=checkpoint_dir + '/g_{}.npz'.format(tl.global_flag['mode']), sess=sess) + tl.files.save_npz(net_d.all_params, name=checkpoint_dir + '/d_{}.npz'.format(tl.global_flag['mode']), sess=sess) + def evaluate(): ## create folders to save result images @@ -258,23 +249,23 @@ def evaluate(): valid_lr_img_list = sorted(tl.files.load_file_list(path=config.VALID.lr_img_path, regx='.*.png', printable=False)) ## If your machine have enough memory, please pre-load the whole train set. - # train_hr_imgs = read_all_imgs(train_hr_img_list, path=config.TRAIN.hr_img_path, n_threads=32) + # train_hr_imgs = tl.vis.read_images(train_hr_img_list, path=config.TRAIN.hr_img_path, n_threads=32) # for im in train_hr_imgs: # print(im.shape) - valid_lr_imgs = read_all_imgs(valid_lr_img_list, path=config.VALID.lr_img_path, n_threads=32) + valid_lr_imgs = tl.vis.read_images(valid_lr_img_list, path=config.VALID.lr_img_path, n_threads=32) # for im in valid_lr_imgs: # print(im.shape) - valid_hr_imgs = read_all_imgs(valid_hr_img_list, path=config.VALID.hr_img_path, n_threads=32) + valid_hr_imgs = tl.vis.read_images(valid_hr_img_list, path=config.VALID.hr_img_path, n_threads=32) # for im in valid_hr_imgs: # print(im.shape) # exit() ###========================== DEFINE MODEL ============================### - imid = 64 # 0: 企鹅 81: 蝴蝶 53: 鸟 64: 古堡 + imid = 64 # 0: 企鹅 81: 蝴蝶 53: 鸟 64: 古堡 valid_lr_img = valid_lr_imgs[imid] valid_hr_img = valid_hr_imgs[imid] - # valid_lr_img = get_imgs_fn('test.png', 'data2017/') # if you want to test your own image - valid_lr_img = (valid_lr_img / 127.5) - 1 # rescale to [-1, 1] + # valid_lr_img = get_imgs_fn('test.png', 'data2017/') # if you want to test your own image + valid_lr_img = (valid_lr_img / 127.5) - 1 # rescale to [-1, 1] # print(valid_lr_img.min(), valid_lr_img.max()) size = valid_lr_img.shape @@ -286,21 +277,22 @@ def evaluate(): ###========================== RESTORE G =============================### sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True, log_device_placement=False)) tl.layers.initialize_global_variables(sess) - tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir+'/g_srgan.npz', network=net_g) + tl.files.load_and_assign_npz(sess=sess, name=checkpoint_dir + '/g_srgan.npz', network=net_g) ###======================= EVALUATION =============================### start_time = time.time() out = sess.run(net_g.outputs, {t_image: [valid_lr_img]}) print("took: %4.4fs" % (time.time() - start_time)) - print("LR size: %s / generated HR size: %s" % (size, out.shape)) # LR size: (339, 510, 3) / gen HR size: (1, 1356, 2040, 3) + print("LR size: %s / generated HR size: %s" % (size, out.shape)) # LR size: (339, 510, 3) / gen HR size: (1, 1356, 2040, 3) print("[*] save images") - tl.vis.save_image(out[0], save_dir+'/valid_gen.png') - tl.vis.save_image(valid_lr_img, save_dir+'/valid_lr.png') - tl.vis.save_image(valid_hr_img, save_dir+'/valid_hr.png') + tl.vis.save_image(out[0], save_dir + '/valid_gen.png') + tl.vis.save_image(valid_lr_img, save_dir + '/valid_lr.png') + tl.vis.save_image(valid_hr_img, save_dir + '/valid_hr.png') + + out_bicu = scipy.misc.imresize(valid_lr_img, [size[0] * 4, size[1] * 4], interp='bicubic', mode=None) + tl.vis.save_image(out_bicu, save_dir + '/valid_bicubic.png') - out_bicu = scipy.misc.imresize(valid_lr_img, [size[0]*4, size[1]*4], interp='bicubic', mode=None) - tl.vis.save_image(out_bicu, save_dir+'/valid_bicubic.png') if __name__ == '__main__': import argparse diff --git a/model.py b/model.py index 2b9c20e9..c4a5705d 100755 --- a/model.py +++ b/model.py @@ -4,6 +4,7 @@ import tensorflow as tf import tensorlayer as tl from tensorlayer.layers import * + # from tensorflow.python.ops import variable_scope as vs # from tensorflow.python.ops import math_ops, init_ops, array_ops, nn # from tensorflow.python.util import nest @@ -11,12 +12,13 @@ # https://github.com/david-gpu/srez/blob/master/srez_model.py + def SRGAN_g(t_image, is_train=False, reuse=False): """ Generator in Photo-Realistic Single Image Super-Resolution Using a Generative Adversarial Network feature maps (n) and stride (s) feature maps (n) and stride (s) """ w_init = tf.random_normal_initializer(stddev=0.02) - b_init = None # tf.constant_initializer(value=0.0) + b_init = None # tf.constant_initializer(value=0.0) g_init = tf.random_normal_initializer(1., 0.02) with tf.variable_scope("SRGAN_g", reuse=reuse) as vs: tl.layers.set_name_reuse(reuse) @@ -57,11 +59,9 @@ def SRGAN_g2(t_image, is_train=False, reuse=False): Use Resize Conv """ w_init = tf.random_normal_initializer(stddev=0.02) - b_init = None # tf.constant_initializer(value=0.0) + b_init = None # tf.constant_initializer(value=0.0) g_init = tf.random_normal_initializer(1., 0.02) - size = t_image.get_shape().as_list() - with tf.variable_scope("SRGAN_g", reuse=reuse) as vs: tl.layers.set_name_reuse(reuse) n = InputLayer(t_image, name='in') @@ -89,17 +89,13 @@ def SRGAN_g2(t_image, is_train=False, reuse=False): # n = SubpixelConv2d(n, scale=2, n_out_channel=None, act=tf.nn.relu, name='pixelshufflerx2/2') ## 0, 1, 2, 3 BILINEAR NEAREST BICUBIC AREA - n = UpSampling2dLayer(n, size=[size[1]*2, size[2]*2], is_scale=False, method=1, align_corners=False, name='up1/upsample2d') - n = Conv2d(n, 64, (3, 3), (1, 1), - padding='SAME', W_init=w_init, b_init=b_init, name='up1/conv2d') # <-- may need to increase n_filter - n = BatchNormLayer(n, act=tf.nn.relu, - is_train=is_train, gamma_init=g_init, name='up1/batch_norm') - - n = UpSampling2dLayer(n, size=[size[1]*4, size[2]*4], is_scale=False, method=1, align_corners=False, name='up2/upsample2d') - n = Conv2d(n, 32, (3, 3), (1, 1), - padding='SAME', W_init=w_init, b_init=b_init, name='up2/conv2d') # <-- may need to increase n_filter - n = BatchNormLayer(n, act=tf.nn.relu, - is_train=is_train, gamma_init=g_init, name='up2/batch_norm') + n = UpSampling2dLayer(n, size=[size[1] * 2, size[2] * 2], is_scale=False, method=1, align_corners=False, name='up1/upsample2d') + n = Conv2d(n, 64, (3, 3), (1, 1), padding='SAME', W_init=w_init, b_init=b_init, name='up1/conv2d') # <-- may need to increase n_filter + n = BatchNormLayer(n, act=tf.nn.relu, is_train=is_train, gamma_init=g_init, name='up1/batch_norm') + + n = UpSampling2dLayer(n, size=[size[1] * 4, size[2] * 4], is_scale=False, method=1, align_corners=False, name='up2/upsample2d') + n = Conv2d(n, 32, (3, 3), (1, 1), padding='SAME', W_init=w_init, b_init=b_init, name='up2/conv2d') # <-- may need to increase n_filter + n = BatchNormLayer(n, act=tf.nn.relu, is_train=is_train, gamma_init=g_init, name='up2/batch_norm') n = Conv2d(n, 3, (1, 1), (1, 1), act=tf.nn.tanh, padding='SAME', W_init=w_init, name='out') return n @@ -112,7 +108,7 @@ def SRGAN_d2(t_image, is_train=False, reuse=False): w_init = tf.random_normal_initializer(stddev=0.02) b_init = None g_init = tf.random_normal_initializer(1., 0.02) - lrelu = lambda x : tl.act.lrelu(x, 0.2) + lrelu = lambda x: tl.act.lrelu(x, 0.2) with tf.variable_scope("SRGAN_d", reuse=reuse) as vs: tl.layers.set_name_reuse(reuse) n = InputLayer(t_image, name='in') @@ -148,71 +144,50 @@ def SRGAN_d2(t_image, is_train=False, reuse=False): return n, logits + def SRGAN_d(input_images, is_train=True, reuse=False): w_init = tf.random_normal_initializer(stddev=0.02) - b_init = None # tf.constant_initializer(value=0.0) - gamma_init=tf.random_normal_initializer(1., 0.02) + b_init = None # tf.constant_initializer(value=0.0) + gamma_init = tf.random_normal_initializer(1., 0.02) df_dim = 64 lrelu = lambda x: tl.act.lrelu(x, 0.2) with tf.variable_scope("SRGAN_d", reuse=reuse): tl.layers.set_name_reuse(reuse) net_in = InputLayer(input_images, name='input/images') - net_h0 = Conv2d(net_in, df_dim, (4, 4), (2, 2), act=lrelu, - padding='SAME', W_init=w_init, name='h0/c') - - net_h1 = Conv2d(net_h0, df_dim*2, (4, 4), (2, 2), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h1/c') - net_h1 = BatchNormLayer(net_h1, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h1/bn') - net_h2 = Conv2d(net_h1, df_dim*4, (4, 4), (2, 2), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h2/c') - net_h2 = BatchNormLayer(net_h2, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h2/bn') - net_h3 = Conv2d(net_h2, df_dim*8, (4, 4), (2, 2), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h3/c') - net_h3 = BatchNormLayer(net_h3, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h3/bn') - net_h4 = Conv2d(net_h3, df_dim*16, (4, 4), (2, 2), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h4/c') - net_h4 = BatchNormLayer(net_h4, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h4/bn') - net_h5 = Conv2d(net_h4, df_dim*32, (4, 4), (2, 2), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h5/c') - net_h5 = BatchNormLayer(net_h5, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h5/bn') - net_h6 = Conv2d(net_h5, df_dim*16, (1, 1), (1, 1), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h6/c') - net_h6 = BatchNormLayer(net_h6, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='h6/bn') - net_h7 = Conv2d(net_h6, df_dim*8, (1, 1), (1, 1), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='h7/c') - net_h7 = BatchNormLayer(net_h7, is_train=is_train, - gamma_init=gamma_init, name='h7/bn') - - net = Conv2d(net_h7, df_dim*2, (1, 1), (1, 1), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='res/c') - net = BatchNormLayer(net, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='res/bn') - net = Conv2d(net, df_dim*2, (3, 3), (1, 1), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='res/c2') - net = BatchNormLayer(net, act=lrelu, is_train=is_train, - gamma_init=gamma_init, name='res/bn2') - net = Conv2d(net, df_dim*8, (3, 3), (1, 1), act=None, - padding='SAME', W_init=w_init, b_init=b_init, name='res/c3') - net = BatchNormLayer(net, is_train=is_train, - gamma_init=gamma_init, name='res/bn3') - net_h8 = ElementwiseLayer(layer=[net_h7, net], - combine_fn=tf.add, name='res/add') + net_h0 = Conv2d(net_in, df_dim, (4, 4), (2, 2), act=lrelu, padding='SAME', W_init=w_init, name='h0/c') + + net_h1 = Conv2d(net_h0, df_dim * 2, (4, 4), (2, 2), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h1/c') + net_h1 = BatchNormLayer(net_h1, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h1/bn') + net_h2 = Conv2d(net_h1, df_dim * 4, (4, 4), (2, 2), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h2/c') + net_h2 = BatchNormLayer(net_h2, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h2/bn') + net_h3 = Conv2d(net_h2, df_dim * 8, (4, 4), (2, 2), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h3/c') + net_h3 = BatchNormLayer(net_h3, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h3/bn') + net_h4 = Conv2d(net_h3, df_dim * 16, (4, 4), (2, 2), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h4/c') + net_h4 = BatchNormLayer(net_h4, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h4/bn') + net_h5 = Conv2d(net_h4, df_dim * 32, (4, 4), (2, 2), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h5/c') + net_h5 = BatchNormLayer(net_h5, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h5/bn') + net_h6 = Conv2d(net_h5, df_dim * 16, (1, 1), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h6/c') + net_h6 = BatchNormLayer(net_h6, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='h6/bn') + net_h7 = Conv2d(net_h6, df_dim * 8, (1, 1), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='h7/c') + net_h7 = BatchNormLayer(net_h7, is_train=is_train, gamma_init=gamma_init, name='h7/bn') + + net = Conv2d(net_h7, df_dim * 2, (1, 1), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='res/c') + net = BatchNormLayer(net, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='res/bn') + net = Conv2d(net, df_dim * 2, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='res/c2') + net = BatchNormLayer(net, act=lrelu, is_train=is_train, gamma_init=gamma_init, name='res/bn2') + net = Conv2d(net, df_dim * 8, (3, 3), (1, 1), act=None, padding='SAME', W_init=w_init, b_init=b_init, name='res/c3') + net = BatchNormLayer(net, is_train=is_train, gamma_init=gamma_init, name='res/bn3') + net_h8 = ElementwiseLayer([net_h7, net], combine_fn=tf.add, name='res/add') net_h8.outputs = tl.act.lrelu(net_h8.outputs, 0.2) net_ho = FlattenLayer(net_h8, name='ho/flatten') - net_ho = DenseLayer(net_ho, n_units=1, act=tf.identity, - W_init = w_init, name='ho/dense') + net_ho = DenseLayer(net_ho, n_units=1, act=tf.identity, W_init=w_init, name='ho/dense') logits = net_ho.outputs net_ho.outputs = tf.nn.sigmoid(net_ho.outputs) return net_ho, logits + def Vgg19_simple_api(rgb, reuse): """ Build the VGG 19 Model @@ -229,7 +204,7 @@ def Vgg19_simple_api(rgb, reuse): # Convert RGB to BGR if tf.__version__ <= '0.11': red, green, blue = tf.split(3, 3, rgb_scaled) - else: # TF 1.0 + else: # TF 1.0 # print(rgb_scaled) red, green, blue = tf.split(rgb_scaled, 3, 3) assert red.get_shape().as_list()[1:] == [224, 224, 1] @@ -242,63 +217,42 @@ def Vgg19_simple_api(rgb, reuse): red - VGG_MEAN[2], ]) else: - bgr = tf.concat([ - blue - VGG_MEAN[0], - green - VGG_MEAN[1], - red - VGG_MEAN[2], - ], axis=3) + bgr = tf.concat( + [ + blue - VGG_MEAN[0], + green - VGG_MEAN[1], + red - VGG_MEAN[2], + ], axis=3) assert bgr.get_shape().as_list()[1:] == [224, 224, 3] - """ input layer """ net_in = InputLayer(bgr, name='input') """ conv1 """ - network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv1_1') - network = Conv2d(network, n_filter=64, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv1_2') - network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), - padding='SAME', name='pool1') + network = Conv2d(net_in, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_1') + network = Conv2d(network, n_filter=64, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv1_2') + network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool1') """ conv2 """ - network = Conv2d(network, n_filter=128, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv2_1') - network = Conv2d(network, n_filter=128, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv2_2') - network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), - padding='SAME', name='pool2') + network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_1') + network = Conv2d(network, n_filter=128, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv2_2') + network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool2') """ conv3 """ - network = Conv2d(network, n_filter=256, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv3_1') - network = Conv2d(network, n_filter=256, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv3_2') - network = Conv2d(network, n_filter=256, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv3_3') - network = Conv2d(network, n_filter=256, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv3_4') - network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), - padding='SAME', name='pool3') + network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_1') + network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_2') + network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_3') + network = Conv2d(network, n_filter=256, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv3_4') + network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool3') """ conv4 """ - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv4_1') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv4_2') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv4_3') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv4_4') - network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), - padding='SAME', name='pool4') # (batch_size, 14, 14, 512) + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_1') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_2') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_3') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv4_4') + network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool4') # (batch_size, 14, 14, 512) conv = network """ conv5 """ - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv5_1') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv5_2') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv5_3') - network = Conv2d(network, n_filter=512, filter_size=(3, 3), - strides=(1, 1), act=tf.nn.relu,padding='SAME', name='conv5_4') - network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), - padding='SAME', name='pool5') # (batch_size, 7, 7, 512) + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_1') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_2') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_3') + network = Conv2d(network, n_filter=512, filter_size=(3, 3), strides=(1, 1), act=tf.nn.relu, padding='SAME', name='conv5_4') + network = MaxPool2d(network, filter_size=(2, 2), strides=(2, 2), padding='SAME', name='pool5') # (batch_size, 7, 7, 512) """ fc 6~8 """ network = FlattenLayer(network, name='flatten') network = DenseLayer(network, n_units=4096, act=tf.nn.relu, name='fc6') @@ -307,6 +261,7 @@ def Vgg19_simple_api(rgb, reuse): print("build model finished: %fs" % (time.time() - start_time)) return network, conv + # def vgg16_cnn_emb(t_image, reuse=False): # """ t_image = 244x244 [0~255] """ # with tf.variable_scope("vgg16_cnn", reuse=reuse) as vs: diff --git a/tensorlayer/__init__.py b/tensorlayer/__init__.py index 0a45da2e..000ed8df 100644 --- a/tensorlayer/__init__.py +++ b/tensorlayer/__init__.py @@ -1,9 +1,6 @@ -""" -Deep learning and Reinforcement learning library for Researchers and Engineers -""" +"""Deep learning and Reinforcement learning library for Researchers and Engineers""" from __future__ import absolute_import - try: install_instr = "Please make sure you install a recent enough version of TensorFlow." import tensorflow @@ -15,7 +12,6 @@ from . import files from . import iterate from . import layers -from . import ops from . import utils from . import visualize from . import prepro @@ -27,7 +23,7 @@ act = activation vis = visualize -__version__ = "1.7.3" +__version__ = "1.7.4" global_flag = {} global_dict = {} diff --git a/tensorlayer/_logging.py b/tensorlayer/_logging.py new file mode 100644 index 00000000..6e8f11da --- /dev/null +++ b/tensorlayer/_logging.py @@ -0,0 +1,7 @@ +import logging + +logging.basicConfig(level=logging.INFO, format='[TL] %(message)s') + + +def info(fmt, *args): + logging.info(fmt, *args) diff --git a/tensorlayer/activation.py b/tensorlayer/activation.py index a5e38360..e1cc9f94 100644 --- a/tensorlayer/activation.py +++ b/tensorlayer/activation.py @@ -3,120 +3,139 @@ import tensorflow as tf -def identity(x, name=None): - """The identity activation function, Shortcut is ``linear``. + +def identity(x): + """The identity activation function. + Shortcut is ``linear``. Parameters ---------- - x : a tensor input - input(s) + x : Tensor + input. Returns - -------- - A `Tensor` with the same type as `x`. + ------- + Tensor + A ``Tensor`` in the same type as ``x``. + """ return x -# Shortcut -linear = identity -def ramp(x=None, v_min=0, v_max=1, name=None): +def ramp(x, v_min=0, v_max=1, name=None): """The ramp activation function. Parameters ---------- - x : a tensor input - input(s) + x : Tensor + input. v_min : float - if input(s) smaller than v_min, change inputs to v_min + cap input to v_min as a lower bound. v_max : float - if input(s) greater than v_max, change inputs to v_max - name : a string or None - An optional name to attach to this activation function. + cap input to v_max as a upper bound. + name : str + The function name (optional). Returns - -------- - A `Tensor` with the same type as `x`. + ------- + Tensor + A ``Tensor`` in the same type as ``x``. + """ return tf.clip_by_value(x, clip_value_min=v_min, clip_value_max=v_max, name=name) -def leaky_relu(x=None, alpha=0.1, name="lrelu"): + +def leaky_relu(x, alpha=0.1, name="lrelu"): """The LeakyReLU, Shortcut is ``lrelu``. - Modified version of ReLU, introducing a nonzero gradient for negative - input. + Modified version of ReLU, introducing a nonzero gradient for negative input. Parameters ---------- - x : A `Tensor` with type `float`, `double`, `int32`, `int64`, `uint8`, - `int16`, or `int8`. - alpha : `float`. slope. - name : a string or None - An optional name to attach to this activation function. + x : Tensor + Support input type ``float``, ``double``, ``int32``, ``int64``, ``uint8``, + ``int16``, or ``int8``. + alpha : float + Slope. + name : str + The function name (optional). Examples - --------- - >>> network = tl.layers.DenseLayer(network, n_units=100, name = 'dense_lrelu', - ... act= lambda x : tl.act.lrelu(x, 0.2)) + -------- + >>> net = tl.layers.DenseLayer(net, 100, act=lambda x : tl.act.lrelu(x, 0.2), name='dense') + + Returns + ------- + Tensor + A ``Tensor`` in the same type as ``x``. References ------------ - - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) `_ + - `Rectifier Nonlinearities Improve Neural Network Acoustic Models, Maas et al. (2013) `__ + """ # with tf.name_scope(name) as scope: - # x = tf.nn.relu(x) - # m_x = tf.nn.relu(-x) - # x -= alpha * m_x + # x = tf.nn.relu(x) + # m_x = tf.nn.relu(-x) + # x -= alpha * m_x x = tf.maximum(x, alpha * x, name=name) return x -#Shortcut -lrelu = leaky_relu - def swish(x, name='swish'): - """The Swish function, see `Swish: a Self-Gated Activation Function `_. + """The Swish function. + See `Swish: a Self-Gated Activation Function `__. Parameters ---------- - x : a tensor input - input(s) + x : Tensor + input. + name: str + function name (optional). Returns - -------- - A `Tensor` with the same type as `x`. + ------- + Tensor + A ``Tensor`` in the same type as ``x``. + """ - with tf.name_scope(name) as scope: - x = tf.nn.sigmoid(x) * x + with tf.name_scope(name): + x = tf.nn.sigmoid(x) * x return x -def pixel_wise_softmax(output, name='pixel_wise_softmax'): + +def pixel_wise_softmax(x, name='pixel_wise_softmax'): """Return the softmax outputs of images, every pixels have multiple label, the sum of a pixel is 1. Usually be used for image segmentation. Parameters - ------------ - output : tensor - - For 2d image, 4D tensor [batch_size, height, weight, channel], channel >= 2. - - For 3d image, 5D tensor [batch_size, depth, height, weight, channel], channel >= 2. + ---------- + x : Tensor + input. + - For 2d image, 4D tensor (batch_size, height, weight, channel), where channel >= 2. + - For 3d image, 5D tensor (batch_size, depth, height, weight, channel), where channel >= 2. + name : str + function name (optional) + + Returns + ------- + Tensor + A ``Tensor`` in the same type as ``x``. Examples - --------- + -------- >>> outputs = pixel_wise_softmax(network.outputs) >>> dice_loss = 1 - dice_coe(outputs, y_, epsilon=1e-5) References - ----------- - - `tf.reverse `_ + ---------- + - `tf.reverse `__ + """ - with tf.name_scope(name) as scope: - return tf.nn.softmax(output) - ## old implementation - # exp_map = tf.exp(output) - # if output.get_shape().ndims == 4: # 2d image - # evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, True])) - # elif output.get_shape().ndims == 5: # 3d image - # evidence = tf.add(exp_map, tf.reverse(exp_map, [False, False, False, False, True])) - # else: - # raise Exception("output parameters should be 2d or 3d image, not %s" % str(output._shape)) - # return tf.div(exp_map, evidence) + with tf.name_scope(name): + return tf.nn.softmax(x) + + +# Alias +linear = identity +lrelu = leaky_relu diff --git a/tensorlayer/cli/__init__.py b/tensorlayer/cli/__init__.py new file mode 100644 index 00000000..1857582e --- /dev/null +++ b/tensorlayer/cli/__init__.py @@ -0,0 +1 @@ +"""The tensorlayer.cli module provides a command-line tool for some common tasks.""" diff --git a/tensorlayer/cli/__main__.py b/tensorlayer/cli/__main__.py new file mode 100644 index 00000000..f20479e3 --- /dev/null +++ b/tensorlayer/cli/__main__.py @@ -0,0 +1,13 @@ +import argparse +from tensorlayer.cli import train + +if __name__ == "__main__": + parser = argparse.ArgumentParser(prog='tl') + subparsers = parser.add_subparsers(dest='cmd') + train_parser = subparsers.add_parser('train', help='train a model using multiple local GPUs or CPUs.') + train.build_arg_parser(train_parser) + args = parser.parse_args() + if args.cmd == 'train': + train.main(args) + else: + parser.print_help() diff --git a/tensorlayer/cli/train.py b/tensorlayer/cli/train.py new file mode 100644 index 00000000..3df923f3 --- /dev/null +++ b/tensorlayer/cli/train.py @@ -0,0 +1,169 @@ +#!/usr/bin/env python +# encoding: utf-8 +""" +tl train +======== + +(Alpha release - usage might change later) + +The tensorlayer.cli.train module provides the ``tl train`` subcommand. +It helps the user bootstrap a TensorFlow/TensorLayer program for distributed training +using multiple GPU cards or CPUs on a computer. + +You need to first setup the `CUDA_VISIBLE_DEVICES `_ +to tell ``tl train`` which GPUs are available. If the CUDA_VISIBLE_DEVICES is not given, +``tl train`` would try best to discover all available GPUs. + +In distribute training, each TensorFlow program needs a TF_CONFIG environment variable to describe +the cluster. It also needs a master daemon to +monitor all trainers. ``tl train`` is responsible +for automatically managing these two tasks. + +Usage +----- + +tl train [-h] [-p NUM_PSS] [-c CPU_TRAINERS] [args [args ...]] + +.. code-block:: bash + + # example of using GPU 0 and 1 for training mnist + CUDA_VISIBLE_DEVICES="0,1" + tl train example/tutorial_mnist_distributed.py + + # example of using CPU trainers for inception v3 + tl train -c 16 example/tutorial_imagenet_inceptionV3_distributed.py + + # example of using GPU trainers for inception v3 with customized arguments + # as CUDA_VISIBLE_DEVICES is not given, tl would try to discover all available GPUs + tl train example/tutorial_imagenet_inceptionV3_distributed.py -- --batch_size 16 + + +Command-line Arguments +---------------------- + +- ``file``: python file path. + +- ``NUM_PSS`` : The number of parameter servers. + +- ``CPU_TRAINERS``: The number of CPU trainers. + + It is recommended that ``NUM_PSS + CPU_TRAINERS <= cpu count`` + +- ``args``: Any parameter after ``--`` would be passed to the python program. + + +Notes +----- +A parallel training program would require multiple parameter servers +to help parallel trainers to exchange intermediate gradients. +The best number of parameter servers is often proportional to the +size of your model as well as the number of CPUs available. +You can control the number of parameter servers using the ``-p`` parameter. + +If you have a single computer with massive CPUs, you can use the ``-c`` parameter +to enable CPU-only parallel training. +The reason we are not supporting GPU-CPU co-training is because GPU and +CPU are running at different speeds. Using them together in training would +incur stragglers. + +""" + +import argparse +import json +import multiprocessing +import os +import platform +import re +import subprocess +import sys + +PORT_BASE = 10000 + + +def _get_gpu_ids(): + if 'CUDA_VISIBLE_DEVICES' in os.environ: + return [int(x) for x in os.environ.get('CUDA_VISIBLE_DEVICES', '').split(',')] + if platform.system() in ['Darwin', 'Linux']: + return [int(d.replace('nvidia', '')) for d in os.listdir('/dev') if re.match('^nvidia\d+$', d)] + else: + print('Please set CUDA_VISIBLE_DEVICES (see http://acceleware.com/blog/cudavisibledevices-masking-gpus)') + return [] + + +GPU_IDS = _get_gpu_ids() + + +def create_tf_config(cluster_spec, task_type, task_index): + return { + 'cluster': cluster_spec, + 'task': { + 'type': task_type, + 'index': task_index + }, + } + + +def create_tf_jobs(cluster_spec, prog, args): + gpu_assignment = dict((('worker', idx), gpu_idx) for (idx, gpu_idx) in enumerate(GPU_IDS)) + for job_type in cluster_spec: + for task_index in range(len(cluster_spec[job_type])): + new_env = os.environ.copy() + new_env.update({ + 'CUDA_VISIBLE_DEVICES': str(gpu_assignment.get((job_type, task_index), '')), + 'TF_CONFIG': json.dumps(create_tf_config(cluster_spec, job_type, task_index)), + }) + yield subprocess.Popen(['python3', prog] + args, env=new_env) + + +def validate_arguments(args): + if args.num_pss < 1: + print('Value error: must have ore than one parameter servers.') + exit(1) + + if not GPU_IDS: + num_cpus = multiprocessing.cpu_count() + if args.cpu_trainers > num_cpus: + print('Value error: there are %s available CPUs but you are requiring %s.' % (num_cpus, args.cpu_trainers)) + exit(1) + + if not os.path.isfile(args.file): + print('Value error: model trainning file does not exist') + exit(1) + + +def main(args): + validate_arguments(args) + num_workers = len(GPU_IDS) if GPU_IDS else args.cpu_trainers + print('Using program %s with args %s' % (args.file, ' '.join(args.args))) + print('Using %d workers, %d parameter servers, %d GPUs.' % (num_workers, args.num_pss, len(GPU_IDS))) + cluster_spec = { + 'ps': ['localhost:%d' % (PORT_BASE + i) for i in range(args.num_pss)], + 'worker': ['localhost:%d' % (PORT_BASE + args.num_pss + i) for i in range(num_workers)] + } + processes = list(create_tf_jobs(cluster_spec, args.file, args.args)) + try: + print('Press ENTER to exit the training ...') + sys.stdin.readline() + except KeyboardInterrupt: # https://docs.python.org/3/library/exceptions.html#KeyboardInterrupt + print('Keyboard interrupt received') + finally: + print('stopping all subprocesses ...') + for p in processes: + p.kill() + for p in processes: + p.wait() + print('END') + + +def build_arg_parser(parser): + parser.add_argument('-p', '--pss', dest='num_pss', type=int, default=1, help='number of parameter servers') + parser.add_argument('-c', '--cpu_trainers', dest='cpu_trainers', type=int, default=1, help='number of CPU trainers') + parser.add_argument('file', help='model trainning file path') + parser.add_argument('args', nargs='*', type=str, help='arguments to ') + + +if __name__ == "__main__": + parser = argparse.ArgumentParser() + build_arg_parser(parser) + args = parser.parse_args() + main(args) diff --git a/tensorlayer/cost.py b/tensorlayer/cost.py index 941b5c5b..07421bb4 100644 --- a/tensorlayer/cost.py +++ b/tensorlayer/cost.py @@ -1,23 +1,19 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- import logging + import tensorflow as tf -import numbers -from tensorflow.python.framework import ops -from tensorflow.python.ops import standard_ops -## Cost Functions def cross_entropy(output, target, name=None): - """It is a softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy of two distributions, implement + """Softmax cross-entropy operation, returns the TensorFlow expression of cross-entropy for two distributions, it implements softmax internally. See ``tf.nn.sparse_softmax_cross_entropy_with_logits``. Parameters ---------- - output : Tensorflow variable - A distribution with shape: [batch_size, n_feature]. - target : Tensorflow variable + output : Tensor + A batch of distribution with shape: [batch_size, num of classes]. + target : Tensor A batch of index with shape: [batch_size, ]. name : string Name of this loss. @@ -28,8 +24,9 @@ def cross_entropy(output, target, name=None): References ----------- - - About cross-entropy: `wiki `_.\n - - The code is borrowed from: `here `_. + - About cross-entropy: ``__. + - The code is borrowed from: ``__. + """ # try: # old # return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=output, targets=target)) @@ -37,8 +34,19 @@ def cross_entropy(output, target, name=None): assert name is not None, "Please give a unique name to tl.cost.cross_entropy for TF1.0+" return tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=target, logits=output, name=name)) + def sigmoid_cross_entropy(output, target, name=None): - """It is a sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``. + """Sigmoid cross-entropy operation, see ``tf.nn.sigmoid_cross_entropy_with_logits``. + + Parameters + ---------- + output : Tensor + A batch of distribution with shape: [batch_size, num of classes]. + target : Tensor + A batch of index with shape: [batch_size, ]. + name : string + Name of this loss. + """ # try: # TF 1.0 return tf.reduce_mean(tf.nn.sigmoid_cross_entropy_with_logits(labels=target, logits=output, name=name)) @@ -47,59 +55,67 @@ def sigmoid_cross_entropy(output, target, name=None): def binary_cross_entropy(output, target, epsilon=1e-8, name='bce_loss'): - """Computes binary cross entropy given `output`. - - For brevity, let `x = output`, `z = target`. The binary cross entropy loss is - - loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) + """Binary cross entropy operation. Parameters ---------- - output : tensor of type `float32` or `float64`. - target : tensor of the same type and shape as `output`. + output : Tensor + Tensor with type of `float32` or `float64`. + target : Tensor + The target distribution, format the same with `output`. epsilon : float - A small value to avoid output is zero. - name : string - An optional name to attach to this layer. + A small value to avoid output to be zero. + name : str + An optional name to attach to this function. References ----------- - - `DRAW `_ + - `ericjang-DRAW `__ + """ -# from tensorflow.python.framework import ops -# with ops.op_scope([output, target], name, "bce_loss") as name: -# output = ops.convert_to_tensor(output, name="preds") -# target = ops.convert_to_tensor(targets, name="target") + # from tensorflow.python.framework import ops + # with ops.op_scope([output, target], name, "bce_loss") as name: + # output = ops.convert_to_tensor(output, name="preds") + # target = ops.convert_to_tensor(targets, name="target") with tf.name_scope(name): - return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + - (1. - target) * tf.log(1. - output + epsilon)), axis=1)) + return tf.reduce_mean(tf.reduce_sum(-(target * tf.log(output + epsilon) + (1. - target) * tf.log(1. - output + epsilon)), axis=1)) + + # For brevity, let `x = output`, `z = target`. The binary cross entropy loss is + # + # loss(x, z) = - sum_i (x[i] * log(z[i]) + (1 - x[i]) * log(1 - z[i])) def mean_squared_error(output, target, is_mean=False, name="mean_squared_error"): - """ Return the TensorFlow expression of mean-square-error (L2) of two batch of data. + """Return the TensorFlow expression of mean-square-error (L2) of two batch of data. Parameters ---------- - output : 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, w, h] or [batch_size, w, h, c]. - target : 2D, 3D or 4D tensor. - is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default). + output : Tensor + 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. + target : Tensor + The target distribution, format the same with `output`. + is_mean : boolean + Whether compute the mean or sum for each example. + - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. + - If False, use ``tf.reduce_sum`` (default). References ------------ - - `Wiki Mean Squared Error `_ + - `Wiki Mean Squared Error `__ + """ with tf.name_scope(name): - if output.get_shape().ndims == 2: # [batch_size, n_feature] + if output.get_shape().ndims == 2: # [batch_size, n_feature] if is_mean: mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), 1)) else: mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), 1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] + elif output.get_shape().ndims == 3: # [batch_size, w, h] if is_mean: mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2])) else: mse = tf.reduce_mean(tf.reduce_sum(tf.squared_difference(output, target), [1, 2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] + elif output.get_shape().ndims == 4: # [batch_size, w, h, c] if is_mean: mse = tf.reduce_mean(tf.reduce_mean(tf.squared_difference(output, target), [1, 2, 3])) else: @@ -108,48 +124,59 @@ def mean_squared_error(output, target, is_mean=False, name="mean_squared_error") raise Exception("Unknow dimension") return mse + def normalized_mean_square_error(output, target): """Return the TensorFlow expression of normalized mean-square-error of two distributions. Parameters ---------- - output : 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, w, h] or [batch_size, w, h, c]. - target : 2D, 3D or 4D tensor. + output : Tensor + 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. + target : Tensor + The target distribution, format the same with `output`. + """ with tf.name_scope("mean_squared_error_loss"): - if output.get_shape().ndims == 2: # [batch_size, n_feature] + if output.get_shape().ndims == 2: # [batch_size, n_feature] nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=1)) nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2])) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] - nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1,2,3])) - nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1,2,3])) + elif output.get_shape().ndims == 3: # [batch_size, w, h] + nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2])) + nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2])) + elif output.get_shape().ndims == 4: # [batch_size, w, h, c] + nmse_a = tf.sqrt(tf.reduce_sum(tf.squared_difference(output, target), axis=[1, 2, 3])) + nmse_b = tf.sqrt(tf.reduce_sum(tf.square(target), axis=[1, 2, 3])) nmse = tf.reduce_mean(nmse_a / nmse_b) return nmse + def absolute_difference_error(output, target, is_mean=False): - """ Return the TensorFlow expression of absolute difference error (L1) of two batch of data. + """Return the TensorFlow expression of absolute difference error (L1) of two batch of data. Parameters ---------- - output : 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, w, h] or [batch_size, w, h, c]. - target : 2D, 3D or 4D tensor. - is_mean : boolean, if True, use ``tf.reduce_mean`` to compute the loss of one data, otherwise, use ``tf.reduce_sum`` (default). + output : Tensor + 2D, 3D or 4D tensor i.e. [batch_size, n_feature], [batch_size, height, width] or [batch_size, height, width, channel]. + target : Tensor + The target distribution, format the same with `output`. + is_mean : boolean + Whether compute the mean or sum for each example. + - If True, use ``tf.reduce_mean`` to compute the loss between one target and predict data. + - If False, use ``tf.reduce_sum`` (default). + """ with tf.name_scope("mean_squared_error_loss"): - if output.get_shape().ndims == 2: # [batch_size, n_feature] + if output.get_shape().ndims == 2: # [batch_size, n_feature] if is_mean: loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), 1)) else: loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), 1)) - elif output.get_shape().ndims == 3: # [batch_size, w, h] + elif output.get_shape().ndims == 3: # [batch_size, w, h] if is_mean: loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2])) else: loss = tf.reduce_mean(tf.reduce_sum(tf.abs(output - target), [1, 2])) - elif output.get_shape().ndims == 4: # [batch_size, w, h, c] + elif output.get_shape().ndims == 4: # [batch_size, w, h, c] if is_mean: loss = tf.reduce_mean(tf.reduce_mean(tf.abs(output - target), [1, 2, 3])) else: @@ -159,27 +186,25 @@ def absolute_difference_error(output, target, is_mean=False): return loss -def dice_coe(output, target, loss_type='jaccard', axis=[1,2,3], smooth=1e-5): +def dice_coe(output, target, loss_type='jaccard', axis=[1, 2, 3], smooth=1e-5): """Soft dice (Sørensen or Jaccard) coefficient for comparing the similarity of two batch of data, usually be used for binary image segmentation i.e. labels are binary. The coefficient between 0 to 1, 1 means totally match. Parameters ----------- - output : tensor - A distribution with shape: [batch_size, ....], (any dimensions). - target : tensor + output : Tensor A distribution with shape: [batch_size, ....], (any dimensions). - loss_type : string + target : Tensor + The target distribution, format the same with `output`. + loss_type : str ``jaccard`` or ``sorensen``, default is ``jaccard``. - axis : list of integer + axis : list of int All dimensions are reduced, default ``[1,2,3]``. smooth : float This small value will be added to the numerator and denominator. - If both output and target are empty, it makes sure dice is 1. - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, - then if smooth is very small, dice close to 0 (even the image values lower than the threshold), - so in this case, higher smooth can have a higher dice. + - If both output and target are empty, it makes sure dice is 1. + - If either output or target are empty (all pixels are background), dice = ```smooth/(small_value + smooth)``, then if smooth is very small, dice close to 0 (even the image values lower than the threshold), so in this case, higher smooth can have a higher dice. Examples --------- @@ -188,7 +213,8 @@ def dice_coe(output, target, loss_type='jaccard', axis=[1,2,3], smooth=1e-5): References ----------- - - `Wiki-Dice `_ + - `Wiki-Dice `__ + """ inse = tf.reduce_sum(output * target, axis=axis) if loss_type == 'jaccard': @@ -210,7 +236,7 @@ def dice_coe(output, target, loss_type='jaccard', axis=[1,2,3], smooth=1e-5): return dice -def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): +def dice_hard_coe(output, target, threshold=0.5, axis=[1, 2, 3], smooth=1e-5): """Non-differentiable Sørensen–Dice coefficient for comparing the similarity of two batch of data, usually be used for binary image segmentation i.e. labels are binary. The coefficient between 0 to 1, 1 if totally match. @@ -220,7 +246,7 @@ def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): output : tensor A distribution with shape: [batch_size, ....], (any dimensions). target : tensor - A distribution with shape: [batch_size, ....], (any dimensions). + The target distribution, format the same with `output`. threshold : float The threshold value to be true. axis : list of integer @@ -230,7 +256,8 @@ def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): References ----------- - - `Wiki-Dice `_ + - `Wiki-Dice `__ + """ output = tf.cast(output > threshold, dtype=tf.float32) target = tf.cast(target > threshold, dtype=tf.float32) @@ -248,17 +275,17 @@ def dice_hard_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): return hard_dice -def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): +def iou_coe(output, target, threshold=0.5, axis=[1, 2, 3], smooth=1e-5): """Non-differentiable Intersection over Union (IoU) for comparing the similarity of two batch of data, usually be used for evaluating binary image segmentation. - The coefficient between 0 to 1, 1 means totally match. + The coefficient between 0 to 1, and 1 means totally match. Parameters ----------- output : tensor - A distribution with shape: [batch_size, ....], (any dimensions). + A batch of distribution with shape: [batch_size, ....], (any dimensions). target : tensor - A distribution with shape: [batch_size, ....], (any dimensions). + The target distribution, format the same with `output`. threshold : float The threshold value to be true. axis : list of integer @@ -269,18 +296,20 @@ def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): Notes ------ - IoU cannot be used as training loss, people usually use dice coefficient for training, IoU and hard-dice for evaluating. + """ pre = tf.cast(output > threshold, dtype=tf.float32) truth = tf.cast(target > threshold, dtype=tf.float32) - inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis) # AND - union = tf.reduce_sum(tf.cast(tf.add(pre, truth)>= 1, dtype=tf.float32), axis=axis) # OR + inse = tf.reduce_sum(tf.multiply(pre, truth), axis=axis) # AND + union = tf.reduce_sum(tf.cast(tf.add(pre, truth) >= 1, dtype=tf.float32), axis=axis) # OR ## old axis=[0,1,2,3] # epsilon = 1e-5 # batch_iou = inse / (union + epsilon) ## new haodong batch_iou = (inse + smooth) / (union + smooth) iou = tf.reduce_mean(batch_iou) - return iou#, pre, truth, inse, union + return iou #, pre, truth, inse, union + # ## test soft/hard dice and iou # import numpy as np @@ -316,37 +345,40 @@ def iou_coe(output, target, threshold=0.5, axis=[1,2,3], smooth=1e-5): # exit() -def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num_steps=None): +def cross_entropy_seq(logits, target_seqs, batch_size=None): #, batch_size=1, num_steps=None): """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for Fixed Length RNN outputs. + softmax internally. Normally be used for fixed length RNN outputs, see `PTB example `__. Parameters ---------- - logits : Tensorflow variable - 2D tensor, ``network.outputs``, [batch_size*n_steps (n_examples), number of output units] - target_seqs : Tensorflow variable - target : 2D tensor [batch_size, n_steps], if the number of step is dynamic, please use ``cross_entropy_seq_with_mask`` instead. + logits : Tensor + 2D tensor with shape of `[batch_size * n_steps, n_classes]`. + target_seqs : Tensor + The target sequence, 2D tensor `[batch_size, n_steps]`, if the number of step is dynamic, please use ``tl.cost.cross_entropy_seq_with_mask`` instead. batch_size : None or int. - If not None, the return cost will be divided by batch_size. + Whether to divide the cost by batch size. + - If integer, the return cost will be divided by `batch_size`. + - If None (default), the return cost will not be divided by anything. Examples -------- - >>> see PTB tutorial for more details - >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) - >>> targets = tf.placeholder(tf.int32, [batch_size, num_steps]) + >>> see `PTB example `__.for more details + >>> input_data = tf.placeholder(tf.int32, [batch_size, n_steps]) + >>> targets = tf.placeholder(tf.int32, [batch_size, n_steps]) + >>> # build the network + >>> print(net.outputs) + ... (batch_size * n_steps, n_classes) >>> cost = tl.cost.cross_entropy_seq(network.outputs, targets) + """ # try: # TF 1.0 sequence_loss_by_example_fn = tf.contrib.legacy_seq2seq.sequence_loss_by_example # except: # sequence_loss_by_example_fn = tf.nn.seq2seq.sequence_loss_by_example - loss = sequence_loss_by_example_fn( - [logits], - [tf.reshape(target_seqs, [-1])], - [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)]) - # [tf.ones([batch_size * num_steps])]) - cost = tf.reduce_sum(loss) #/ batch_size + loss = sequence_loss_by_example_fn([logits], [tf.reshape(target_seqs, [-1])], [tf.ones_like(tf.reshape(target_seqs, [-1]), dtype=tf.float32)]) + # [tf.ones([batch_size * num_steps])]) + cost = tf.reduce_sum(loss) #/ batch_size if batch_size is not None: cost = cost / batch_size return cost @@ -354,33 +386,60 @@ def cross_entropy_seq(logits, target_seqs, batch_size=None):#, batch_size=1, num def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details=False, name=None): """Returns the expression of cross-entropy of two sequences, implement - softmax internally. Normally be used for Dynamic RNN outputs. + softmax internally. Normally be used for Dynamic RNN with Synced sequence input and output. Parameters ----------- - logits : network identity outputs - 2D tensor, ``network.outputs``, [batch_size, number of output units]. - target_seqs : int of tensor, like word ID. - [batch_size, ?] - input_mask : the mask to compute loss - The same size with target_seqs, normally 0 and 1. + logits : Tensor + 2D tensor with shape of [batch_size * ?, n_classes], `?` means dynamic IDs for each example. + - Can be get from `DynamicRNNLayer` by setting ``return_seq_2d`` to `True`. + target_seqs : Tensor + int of tensor, like word ID. [batch_size, ?], `?` means dynamic IDs for each example. + input_mask : Tensor + The mask to compute loss, it has the same size with `target_seqs`, normally 0 or 1. return_details : boolean - - If False (default), only returns the loss. - - If True, returns the loss, losses, weights and targets (reshape to one vetcor). + Whether to return detailed losses. + - If False (default), only returns the loss. + - If True, returns the loss, losses, weights and targets (see source code). Examples -------- - - see Image Captioning Example. + >>> batch_size = 64 + >>> vocab_size = 10000 + >>> embedding_size = 256 + >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") + >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target") + >>> input_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="mask") + >>> net = tl.layers.EmbeddingInputlayer( + ... inputs = input_seqs, + ... vocabulary_size = vocab_size, + ... embedding_size = embedding_size, + ... name = 'seq_embedding') + >>> net = tl.layers.DynamicRNNLayer(net, + ... cell_fn = tf.contrib.rnn.BasicLSTMCell, + ... n_hidden = embedding_size, + ... dropout = (0.7 if is_train else None), + ... sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs), + ... return_seq_2d = True, + ... name = 'dynamicrnn') + >>> print(net.outputs) + ... (?, 256) + >>> net = tl.layers.DenseLayer(net, n_units=vocab_size, name="output") + >>> print(net.outputs) + ... (?, 10000) + >>> loss = tl.cost.cross_entropy_seq_with_mask(net.outputs, target_seqs, input_mask) + """ - targets = tf.reshape(target_seqs, [-1]) # to one vector - weights = tf.to_float(tf.reshape(input_mask, [-1])) # to one vector like targets + targets = tf.reshape(target_seqs, [-1]) # to one vector + weights = tf.to_float(tf.reshape(input_mask, [-1])) # to one vector like targets losses = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name) * weights #losses = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=targets, name=name)) # for TF1.0 and others # try: ## TF1.0 - loss = tf.divide(tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! - tf.reduce_sum(weights), - name="seq_loss_with_mask") + loss = tf.divide( + tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! + tf.reduce_sum(weights), + name="seq_loss_with_mask") # except: ## TF0.12 # loss = tf.div(tf.reduce_sum(losses), # loss from mask. reduce_sum before element-wise mul with mask !! # tf.reduce_sum(weights), @@ -392,15 +451,22 @@ def cross_entropy_seq_with_mask(logits, target_seqs, input_mask, return_details= def cosine_similarity(v1, v2): - """Cosine similarity [-1, 1], `wiki `_. + """Cosine similarity [-1, 1]. Parameters - ----------- - v1, v2 : tensor of [batch_size, n_feature], with the same number of features. + ---------- + v1, v2 : Tensor + Tensor with the same shape [batch_size, n_feature]. Returns - ----------- - a tensor of [batch_size, ] + ------- + Tensor + a tensor of shape [batch_size]. + + References + ---------- + - ``__. + """ # try: ## TF1.0 cost = tf.reduce_sum(tf.multiply(v1, v2), 1) / (tf.sqrt(tf.reduce_sum(tf.multiply(v1, v1), 1)) * tf.sqrt(tf.reduce_sum(tf.multiply(v2, v2), 1))) @@ -411,263 +477,243 @@ def cosine_similarity(v1, v2): ## Regularization Functions def li_regularizer(scale, scope=None): - """li regularization removes the neurons of previous layer, `i` represents `inputs`.\n - Returns a function that can be used to apply group li regularization to weights.\n - The implementation follows `TensorFlow contrib `_. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: An optional scope name for TF12+. - - Returns - -------- - A function with signature `li(weights, name=None)` that apply Li regularization. - - Raises - ------ - ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float. - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - # from tensorflow.python.platform import tf_logging as logging - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % - scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def li(weights, name=None): - """Applies li regularization to weights.""" - with tf.name_scope('li_regularizer') as scope: - my_scale = ops.convert_to_tensor(scale, - dtype=weights.dtype.base_dtype, - name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, - standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), - name=scope) - return li - - - -def lo_regularizer(scale, scope=None): - """lo regularization removes the neurons of current layer, `o` represents `outputs`\n - Returns a function that can be used to apply group lo regularization to weights.\n - The implementation follows `TensorFlow contrib `_. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: An optional scope name for TF12+. - - Returns - ------- - A function with signature `lo(weights, name=None)` that apply Lo regularization. - - Raises - ------ - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - # from tensorflow.python.platform import tf_logging as logging - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % - scale) - if scale >= 1.: - raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def lo(weights, name='lo_regularizer'): - """Applies group column regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, - dtype=weights.dtype.base_dtype, - name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn( - my_scale, - standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), - name=scope) - return lo - -def maxnorm_regularizer(scale=1.0, scope=None): - """Max-norm regularization returns a function that can be used - to apply max-norm regularization to weights. - About max-norm: `wiki `_.\n - The implementation follows `TensorFlow contrib `_. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: An optional scope name. - - Returns - --------- - A function with signature `mn(weights, name=None)` that apply Lo regularization. - - Raises - -------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % - scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn(weights, name='max_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, - dtype=weights.dtype.base_dtype, - name='scale') - # if tf.__version__ <= '0.12': - # standard_ops_fn = standard_ops.mul - # else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope) - return mn - -def maxnorm_o_regularizer(scale, scope): - """Max-norm output regularization removes the neurons of current layer.\n - Returns a function that can be used to apply max-norm regularization to each column of weight matrix.\n - The implementation follows `TensorFlow contrib `_. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: An optional scope name. - - Returns - --------- - A function with signature `mn_o(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % - scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_o(weights, name='maxnorm_o_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, - dtype=weights.dtype.base_dtype, - name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope) - return mn_o - -def maxnorm_i_regularizer(scale, scope=None): - """Max-norm input regularization removes the neurons of previous layer.\n - Returns a function that can be used to apply max-norm regularization to each row of weight matrix.\n - The implementation follows `TensorFlow contrib `_. - - Parameters - ---------- - scale : float - A scalar multiplier `Tensor`. 0.0 disables the regularizer. - scope: An optional scope name. - - Returns - --------- - A function with signature `mn_i(weights, name=None)` that apply Lo regularization. - - Raises - --------- - ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. - """ - import numbers - from tensorflow.python.framework import ops - from tensorflow.python.ops import standard_ops - - if isinstance(scale, numbers.Integral): - raise ValueError('scale cannot be an integer: %s' % scale) - if isinstance(scale, numbers.Real): - if scale < 0.: - raise ValueError('Setting a scale less than 0 on a regularizer: %g' % - scale) - # if scale >= 1.: - # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % - # scale) - if scale == 0.: - logging.info('Scale of 0 disables regularizer.') - return lambda _, name=None: None - - def mn_i(weights, name='maxnorm_i_regularizer'): - """Applies max-norm regularization to weights.""" - with tf.name_scope(name) as scope: - my_scale = ops.convert_to_tensor(scale, - dtype=weights.dtype.base_dtype, - name='scale') - if tf.__version__ <= '0.12': - standard_ops_fn = standard_ops.mul - else: - standard_ops_fn = standard_ops.multiply - return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope) - return mn_i + """Li regularization removes the neurons of previous layer. The `i` represents `inputs`. + Returns a function that can be used to apply group li regularization to weights. + The implementation follows `TensorFlow contrib `__. + Parameters + ---------- + scale : float + A scalar multiplier `Tensor`. 0.0 disables the regularizer. + scope: str + An optional scope name for this function. + Returns + -------- + A function with signature `li(weights, name=None)` that apply Li regularization. + Raises + ------ + ValueError : if scale is outside of the range [0.0, 1.0] or if scale is not a float. + """ + import numbers + from tensorflow.python.framework import ops + from tensorflow.python.ops import standard_ops + # from tensorflow.python.platform import tf_logging as logging + + if isinstance(scale, numbers.Integral): + raise ValueError('scale cannot be an integer: %s' % scale) + if isinstance(scale, numbers.Real): + if scale < 0.: + raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) + if scale >= 1.: + raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) + if scale == 0.: + logging.info('Scale of 0 disables regularizer.') + return lambda _, name=None: None + + def li(weights): + """Applies li regularization to weights.""" + with tf.name_scope('li_regularizer') as scope: + my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') + # if tf.__version__ <= '0.12': + # standard_ops_fn = standard_ops.mul + # else: + standard_ops_fn = standard_ops.multiply + return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 1))), name=scope) + + return li + + +def lo_regularizer(scale): + """Lo regularization removes the neurons of current layer. The `o` represents `outputs` + Returns a function that can be used to apply group lo regularization to weights. + The implementation follows `TensorFlow contrib `__. -# + Parameters + ---------- + scale : float + A scalar multiplier `Tensor`. 0.0 disables the regularizer. + + Returns + ------- + A function with signature `lo(weights, name=None)` that apply Lo regularization. + + Raises + ------ + ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. + + """ + import numbers + from tensorflow.python.framework import ops + from tensorflow.python.ops import standard_ops + # from tensorflow.python.platform import tf_logging as logging + + if isinstance(scale, numbers.Integral): + raise ValueError('scale cannot be an integer: %s' % scale) + if isinstance(scale, numbers.Real): + if scale < 0.: + raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) + if scale >= 1.: + raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % scale) + if scale == 0.: + logging.info('Scale of 0 disables regularizer.') + return lambda _, name=None: None + + def lo(weights, name='lo_regularizer'): + """Applies group column regularization to weights.""" + with tf.name_scope(name) as scope: + my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') + # if tf.__version__ <= '0.12': + # standard_ops_fn = standard_ops.mul + # else: + standard_ops_fn = standard_ops.multiply + return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.sqrt(standard_ops.reduce_sum(tf.square(weights), 0))), name=scope) + + return lo + + +def maxnorm_regularizer(scale=1.0): + """Max-norm regularization returns a function that can be used to apply max-norm regularization to weights. + + More about max-norm, see `wiki-max norm `_. + The implementation follows `TensorFlow contrib `__. + + Parameters + ---------- + scale : float + A scalar multiplier `Tensor`. 0.0 disables the regularizer. + + Returns + --------- + A function with signature `mn(weights, name=None)` that apply Lo regularization. + + Raises + -------- + ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. + + """ + import numbers + from tensorflow.python.framework import ops + from tensorflow.python.ops import standard_ops + + if isinstance(scale, numbers.Integral): + raise ValueError('scale cannot be an integer: %s' % scale) + if isinstance(scale, numbers.Real): + if scale < 0.: + raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) + # if scale >= 1.: + # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % + # scale) + if scale == 0.: + logging.info('Scale of 0 disables regularizer.') + return lambda _, name=None: None + + def mn(weights, name='max_regularizer'): + """Applies max-norm regularization to weights.""" + with tf.name_scope(name) as scope: + my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') + # if tf.__version__ <= '0.12': + # standard_ops_fn = standard_ops.mul + # else: + standard_ops_fn = standard_ops.multiply + return standard_ops_fn(my_scale, standard_ops.reduce_max(standard_ops.abs(weights)), name=scope) + + return mn + + +def maxnorm_o_regularizer(scale): + """Max-norm output regularization removes the neurons of current layer. + Returns a function that can be used to apply max-norm regularization to each column of weight matrix. + The implementation follows `TensorFlow contrib `__. + + Parameters + ---------- + scale : float + A scalar multiplier `Tensor`. 0.0 disables the regularizer. + + Returns + --------- + A function with signature `mn_o(weights, name=None)` that apply Lo regularization. + + Raises + --------- + ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. + + """ + import numbers + from tensorflow.python.framework import ops + from tensorflow.python.ops import standard_ops + + if isinstance(scale, numbers.Integral): + raise ValueError('scale cannot be an integer: %s' % scale) + if isinstance(scale, numbers.Real): + if scale < 0.: + raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) + # if scale >= 1.: + # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % + # scale) + if scale == 0.: + logging.info('Scale of 0 disables regularizer.') + return lambda _, name=None: None + + def mn_o(weights, name='maxnorm_o_regularizer'): + """Applies max-norm regularization to weights.""" + with tf.name_scope(name) as scope: + my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') + if tf.__version__ <= '0.12': + standard_ops_fn = standard_ops.mul + else: + standard_ops_fn = standard_ops.multiply + return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 0)), name=scope) + + return mn_o + + +def maxnorm_i_regularizer(scale): + """Max-norm input regularization removes the neurons of previous layer. + Returns a function that can be used to apply max-norm regularization to each row of weight matrix. + The implementation follows `TensorFlow contrib `__. + + Parameters + ---------- + scale : float + A scalar multiplier `Tensor`. 0.0 disables the regularizer. + + Returns + --------- + A function with signature `mn_i(weights, name=None)` that apply Lo regularization. + + Raises + --------- + ValueError : If scale is outside of the range [0.0, 1.0] or if scale is not a float. + + """ + import numbers + from tensorflow.python.framework import ops + from tensorflow.python.ops import standard_ops + + if isinstance(scale, numbers.Integral): + raise ValueError('scale cannot be an integer: %s' % scale) + if isinstance(scale, numbers.Real): + if scale < 0.: + raise ValueError('Setting a scale less than 0 on a regularizer: %g' % scale) + # if scale >= 1.: + # raise ValueError('Setting a scale greater than 1 on a regularizer: %g' % + # scale) + if scale == 0.: + logging.info('Scale of 0 disables regularizer.') + return lambda _, name=None: None + + def mn_i(weights, name='maxnorm_i_regularizer'): + """Applies max-norm regularization to weights.""" + with tf.name_scope(name) as scope: + my_scale = ops.convert_to_tensor(scale, dtype=weights.dtype.base_dtype, name='scale') + if tf.__version__ <= '0.12': + standard_ops_fn = standard_ops.mul + else: + standard_ops_fn = standard_ops.multiply + return standard_ops_fn(my_scale, standard_ops.reduce_sum(standard_ops.reduce_max(standard_ops.abs(weights), 1)), name=scope) + + return mn_i diff --git a/tensorlayer/db.py b/tensorlayer/db.py index 3858224b..15ff35b1 100644 --- a/tensorlayer/db.py +++ b/tensorlayer/db.py @@ -6,34 +6,25 @@ Latest Version """ - -import tensorflow as tf -import tensorlayer as tl -import numpy as np +import inspect +import pickle import time -import math - - import uuid +from datetime import datetime + +import numpy as np -import pymongo import gridfs -import pickle from pymongo import MongoClient -from datetime import datetime -import inspect def AutoFill(func): - def func_wrapper(self,*args,**kwargs): - d=inspect.getcallargs(func,self,*args,**kwargs) - d['args'].update({"studyID":self.studyID}) - return func(**d) - return func_wrapper - - - + def func_wrapper(self, *args, **kwargs): + d = inspect.getcallargs(func, self, *args, **kwargs) + d['args'].update({"studyID": self.studyID}) + return func(**d) + return func_wrapper class TensorDB(object): @@ -41,13 +32,18 @@ class TensorDB(object): Parameters ------------- - ip : string, localhost or IP address. - port : int, port number. - db_name : string, database name. - user_name : string, set to None if it donnot need authentication. - password : string. - - Properties + ip : str + Localhost or IP address. + port : int + Port number. + db_name : str + Database name. + user_name : str + User name. Set to None if it donnot need authentication. + password : str + Password + + Attributes ------------ db : ``pymongo.MongoClient[db_name]``, xxxxxx datafs : ``gridfs.GridFS(self.db, collection="datafs")``, xxxxxxxxxx @@ -59,43 +55,30 @@ class TensorDB(object): db.TestLog : Collection for studyID : string, unique ID, if None random generate one. - Dependencies + Notes ------------- - 1 : MongoDB, as TensorDB is based on MongoDB, you need to install it in your - local machine or remote machine. - 2 : pip install pymongo, for MongoDB python API. - - Optional Tools - ---------------- - 1 : You may like to install MongoChef or Mongo Management Studo APP for - visualizing or testing your MongoDB. + - MongoDB, as TensorDB is based on MongoDB, you need to install it in your local machine or remote machine. + - pip install pymongo, for MongoDB python API. + - You may like to install MongoChef or Mongo Management Studo APP for visualizing or testing your MongoDB. """ - def __init__( - self, - ip = 'localhost', - port = 27017, - db_name = 'db_name', - user_name = None, - password = 'password', - studyID=None - ): + + def __init__(self, ip='localhost', port=27017, db_name='db_name', user_name=None, password='password', studyID=None): ## connect mongodb client = MongoClient(ip, port) self.db = client[db_name] if user_name != None: self.db.authenticate(user_name, password) - if studyID is None: - self.studyID=str(uuid.uuid1()) + self.studyID = str(uuid.uuid1()) else: - self.studyID=studyID + self.studyID = studyID ## define file system (Buckets) self.datafs = gridfs.GridFS(self.db, collection="datafs") self.modelfs = gridfs.GridFS(self.db, collection="modelfs") self.paramsfs = gridfs.GridFS(self.db, collection="paramsfs") - self.archfs=gridfs.GridFS(self.db,collection="ModelArchitecture") + self.archfs = gridfs.GridFS(self.db, collection="ModelArchitecture") ## print("[TensorDB] Connect SUCCESS {}:{} {} {} {}".format(ip, port, db_name, user_name, studyID)) @@ -104,16 +87,16 @@ def __init__( self.db_name = db_name self.user_name = user_name - def __autofill(self,args): - return args.update({'studyID':self.studyID}) + def __autofill(self, args): + return args.update({'studyID': self.studyID}) - def __serialization(self,ps): + def __serialization(self, ps): return pickle.dumps(ps, protocol=2) - def __deserialization(self,ps): + def __deserialization(self, ps): return pickle.loads(ps) - def save_params(self, params=[], args={}):#, file_name='parameters'): + def save_params(self, params=[], args={}): #, file_name='parameters'): """ Save parameters into MongoDB Buckets, and save the file ID into Params Collections. Parameters @@ -127,20 +110,21 @@ def save_params(self, params=[], args={}):#, file_name='parameters'): """ self.__autofill(args) s = time.time() - f_id = self.paramsfs.put(self.__serialization(params))#, file_name=file_name) + f_id = self.paramsfs.put(self.__serialization(params)) #, file_name=file_name) args.update({'f_id': f_id, 'time': datetime.utcnow()}) self.db.Params.insert_one(args) # print("[TensorDB] Save params: {} SUCCESS, took: {}s".format(file_name, round(time.time()-s, 2))) - print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time()-s, 2))) + print("[TensorDB] Save params: SUCCESS, took: {}s".format(round(time.time() - s, 2))) return f_id @AutoFill - def find_one_params(self, args={},sort=None): + def find_one_params(self, args={}, sort=None): """ Find one parameter from MongoDB Buckets. Parameters ---------- - args : dictionary, find items. + args : dictionary + For finding items. Returns -------- @@ -150,7 +134,7 @@ def find_one_params(self, args={},sort=None): s = time.time() # print(args) - d = self.db.Params.find_one(filter=args,sort=sort) + d = self.db.Params.find_one(filter=args, sort=sort) if d is not None: f_id = d['f_id'] @@ -159,7 +143,7 @@ def find_one_params(self, args={},sort=None): return False, False try: params = self.__deserialization(self.paramsfs.get(f_id).read()) - print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time()-s, 2))) + print("[TensorDB] Find one params SUCCESS, {} took: {}s".format(args, round(time.time() - s, 2))) return params, f_id except: return False, False @@ -184,14 +168,14 @@ def find_all_params(self, args={}): if pc is not None: f_id_list = pc.distinct('f_id') params = [] - for f_id in f_id_list: # you may have multiple Buckets files + for f_id in f_id_list: # you may have multiple Buckets files tmp = self.paramsfs.get(f_id).read() params.append(self.__deserialization(tmp)) else: print("[TensorDB] FAIL! Cannot find any: {}".format(args)) return False - print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time()-s, 2))) + print("[TensorDB] Find all params SUCCESS, took: {}s".format(round(time.time() - s, 2))) return params @AutoFill @@ -219,7 +203,7 @@ def _print_dict(self, args): string = '' for key, value in args.items(): if key is not '_id': - string += str(key) + ": "+ str(value) + " / " + string += str(key) + ": " + str(value) + " / " return string ## =========================== LOG =================================== ## @@ -269,7 +253,7 @@ def valid_log(self, args={}): _result = self.db.ValidLog.insert_one(args) # _log = "".join(str(key) + ": " + str(value) for key, value in args.items()) _log = self._print_dict(args) - print("[TensorDB] ValidLog: " +_log) + print("[TensorDB] ValidLog: " + _log) return _result @AutoFill @@ -299,7 +283,7 @@ def test_log(self, args={}): _result = self.db.TestLog.insert_one(args) # _log = "".join(str(key) + str(value) for key, value in args.items()) _log = self._print_dict(args) - print("[TensorDB] TestLog: " +_log) + print("[TensorDB] TestLog: " + _log) return _result @AutoFill @@ -316,14 +300,14 @@ def del_test_log(self, args={}): ## =========================== Network Architecture ================== ## @AutoFill - def save_model_architecture(self,s,args={}): + def save_model_architecture(self, s, args={}): self.__autofill(args) - fid=self.archfs.put(s,filename="modelarchitecture") - args.update({"fid":fid}) + fid = self.archfs.put(s, filename="modelarchitecture") + args.update({"fid": fid}) self.db.march.insert_one(args) @AutoFill - def load_model_architecture(self,args={}): + def load_model_architecture(self, args={}): d = self.db.march.find_one(args) if d is not None: @@ -333,7 +317,7 @@ def load_model_architecture(self,args={}): # "print find" else: print("[TensorDB] FAIL! Cannot find: {}".format(args)) - print ("no idtem") + print("no idtem") return False, False try: archs = self.archfs.get(fid).read() @@ -387,7 +371,6 @@ def find_one_job(self, args={}): dictionary : contains all meta data and script. """ - temp = self.db.Job.find_one(args) if temp is not None: @@ -402,151 +385,36 @@ def find_one_job(self, args={}): return temp - def push_job(self,margs, wargs,dargs,epoch): + def push_job(self, margs, wargs, dargs, epoch): - ms,mid=self.load_model_architecture(margs) - weight,wid=self.find_one_params(wargs) - args={"weight":wid,"model":mid,"dargs":dargs,"epoch":epoch,"time":datetime.utcnow(),"Running":False} + ms, mid = self.load_model_architecture(margs) + weight, wid = self.find_one_params(wargs) + args = {"weight": wid, "model": mid, "dargs": dargs, "epoch": epoch, "time": datetime.utcnow(), "Running": False} self.__autofill(args) self.db.JOBS.insert_one(args) def peek_job(self): - args={'Running':False} + args = {'Running': False} self.__autofill(args) - m=self.db.JOBS.find_one(args) + m = self.db.JOBS.find_one(args) print(m) if m is None: return False - s=self.paramsfs.get(m['weight']).read() - w=self.__deserialization(s) + s = self.paramsfs.get(m['weight']).read() + w = self.__deserialization(s) - ach=self.archfs.get(m['model']).read() + ach = self.archfs.get(m['model']).read() - return m['_id'], ach,w,m["dargs"],m['epoch'] + return m['_id'], ach, w, m["dargs"], m['epoch'] - def run_job(self,jid): - self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Since":datetime.utcnow()}}) + def run_job(self, jid): + self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Since": datetime.utcnow()}}) - def del_job(self,jid): - self.db.JOBS.find_one_and_update({'_id':jid},{'$set': {'Running': True,"Finished":datetime.utcnow()}}) + def del_job(self, jid): + self.db.JOBS.find_one_and_update({'_id': jid}, {'$set': {'Running': True, "Finished": datetime.utcnow()}}) def __str__(self): _s = "[TensorDB] Info:\n" _t = _s + " " + str(self.db) return _t - - # def save_bulk_data(self, data=None, filename='filename'): - # """ Put bulk data into TensorDB.datafs, return file ID. - # When you have a very large data, you may like to save it into GridFS Buckets - # instead of Collections, then when you want to load it, XXXX - # - # Parameters - # ----------- - # data : serialized data. - # filename : string, GridFS Buckets. - # - # References - # ----------- - # - MongoDB find, xxxxx - # """ - # s = time.time() - # f_id = self.datafs.put(data, filename=filename) - # print("[TensorDB] save_bulk_data: {} took: {}s".format(filename, round(time.time()-s, 2))) - # return f_id - # - # def save_collection(self, data=None, collect_name='collect_name'): - # """ Insert data into MongoDB Collections, return xx. - # - # Parameters - # ----------- - # data : serialized data. - # collect_name : string, MongoDB collection name. - # - # References - # ----------- - # - MongoDB find, xxxxx - # """ - # s = time.time() - # rl = self.db[collect_name].insert_many(data) - # print("[TensorDB] save_collection: {} took: {}s".format(collect_name, round(time.time()-s, 2))) - # return rl - # - # def find(self, args={}, collect_name='collect_name'): - # """ Find data from MongoDB Collections. - # - # Parameters - # ----------- - # args : dictionary, arguments for finding. - # collect_name : string, MongoDB collection name. - # - # References - # ----------- - # - MongoDB find, xxxxx - # """ - # s = time.time() - # - # pc = self.db[collect_name].find(args) # pymongo.cursor.Cursor object - # flist = pc.distinct('f_id') - # fldict = {} - # for f in flist: # you may have multiple Buckets files - # # fldict[f] = pickle.loads(self.datafs.get(f).read()) - # # s2 = time.time() - # tmp = self.datafs.get(f).read() - # # print(time.time()-s2) - # fldict[f] = pickle.loads(tmp) - # # print(time.time()-s2) - # # exit() - # # print(round(time.time()-s, 2)) - # data = [fldict[x['f_id']][x['id']] for x in pc] - # data = np.asarray(data) - # print("[TensorDB] find: {} get: {} took: {}s".format(collect_name, pc.count(), round(time.time()-s, 2))) - # return data - - - -class DBLogger: - """ """ - def __init__(self,db,model): - self.db=db - self.model=model - - def on_train_begin(self,logs={}): - print("start") - - def on_train_end(self,logs={}): - print("end") - - def on_epoch_begin(self,epoch,logs={}): - self.epoch=epoch - self.et=time.time() - return - - def on_epoch_end(self, epoch, logs={}): - self.et=time.time()-self.et - print("ending") - print(epoch) - logs['epoch']=epoch - logs['time']=datetime.utcnow() - logs['stepTime']=self.et - logs['acc']=np.asscalar(logs['acc']) - print(logs) - - w=self.model.Params - fid=self.db.save_params(w,logs) - logs.update({'params':fid}) - self.db.valid_log(logs) - def on_batch_begin(self, batch,logs={}): - self.t=time.time() - self.losses = [] - self.batch=batch - - def on_batch_end(self, batch, logs={}): - self.t2=time.time()-self.t - logs['acc']=np.asscalar(logs['acc']) - #logs['loss']=np.asscalar(logs['loss']) - logs['step_time']=self.t2 - logs['time']=datetime.utcnow() - logs['epoch']=self.epoch - logs['batch']=self.batch - self.db.train_log(logs) diff --git a/tensorlayer/distributed.py b/tensorlayer/distributed.py index 6e9c223a..957704c5 100644 --- a/tensorlayer/distributed.py +++ b/tensorlayer/distributed.py @@ -1,33 +1,42 @@ #! /usr/bin/python # -*- coding: utf-8 -*- -import tensorflow as tf -from tensorflow.python.training import session_run_hook -import os -import sys + import json +import os import time +import tensorflow as tf +from tensorflow.python.training import session_run_hook + class TaskSpecDef(object): - """Specification for the distributed task with the job name, index of the task, + """Specification for a distributed task. + + It contains the job name, index of the task, the parameter servers and the worker servers. If you want to use the last worker - for continuous evaluation you can call the method `user_last_worker_as_evaluator` + for continuous evaluation you can call the method `use_last_worker_as_evaluator` which returns a new :class:`TaskSpecDef` object without the last worker in the cluster specification. Parameters ---------- - type : A string with the job name, it will be `master`, `worker` or `ps`. - index : The zero-based index of the task. Distributed training jobs will have a single + task_type : str + Task type. One of `master`, `worker` or `ps`. + index : int + The zero-based index of the task. Distributed training jobs will have a single master task, one or more parameter servers, and one or more workers. - trial : The identifier of the trial being run. - ps_hosts : A string with a coma separate list of hosts for the parameter servers + trial : int + The identifier of the trial being run. + ps_hosts : str OR list of str + A string with a coma separate list of hosts for the parameter servers or a list of hosts. - worker_hosts : A string with a coma separate list of hosts for the worker servers + worker_hosts : str OR list of str + A string with a coma separate list of hosts for the worker servers or a list of hosts. - master : A string with the master hosts + master : str + A string with the master hosts - Note + Notes ---------- master might not be included in TF_CONFIG and can be None. The shard_index is adjusted in any case to assign 0 to master and >= 1 to workers. @@ -37,12 +46,12 @@ class TaskSpecDef(object): References ---------- - - `ML-engine trainer considerations `_ + - `ML-engine trainer considerations `__ + """ - def __init__(self, type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None, - master=None): - self.type = type + def __init__(self, task_type='master', index=0, trial=None, ps_hosts=None, worker_hosts=None, master=None): + self.type = task_type self._index = int(index) self._cluster_spec = None self.num_workers = 1 @@ -53,36 +62,26 @@ def __init__(self, type='master', index=0, trial=None, ps_hosts=None, worker_hos self.ps_hosts = ps_hosts self.worker_hosts = worker_hosts self.master = master + self._server = None if ps_hosts and worker_hosts: - ps = ps_hosts if isinstance(ps_hosts, list) else ps_hosts.split(',') - self.num_ps = len(ps) - worker = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',') + self.ps_hosts = ps_hosts if isinstance(ps_hosts, list) else ps_hosts.split(',') + self.num_ps = len(self.ps_hosts) + self.worker_hosts = worker_hosts if isinstance(worker_hosts, list) else worker_hosts.split(',') if master is not None and len(master) > 0: - self._cluster_spec = tf.train.ClusterSpec({'ps' : ps, - 'worker': worker, - 'master': master}) + self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts, 'master': master}) # master is a worker too - self.num_workers = len(worker) + 1 + self.num_workers = len(self.worker_hosts) + 1 if self.type == 'worker': self.shard_index = self._index + 1 self._master = self.type == 'master' else: - self._cluster_spec = tf.train.ClusterSpec({'ps' : ps, - 'worker': worker}) + self._cluster_spec = tf.train.ClusterSpec({'ps': self.ps_hosts, 'worker': self.worker_hosts}) + self.num_workers = len(self.worker_hosts) if self.type == 'worker': self.shard_index = self._index self._master = self.type == 'worker' and self._index == 0 - # create server and join if it is a parameter server - self._server = tf.train.Server(self._cluster_spec, - job_name=self.type, - task_index=self._index) - if self.is_ps(): - self._server.join() - else: - self._server = None - def is_ps(self): """Returns true if this server is a parameter server""" return self.type == 'ps' @@ -97,90 +96,130 @@ def is_master(self): def is_evaluator(self): """Returns true if this server is the evaluator server""" - return self.type == 'worker' and len(self.worker_hosts) == self._index + return self.type == 'worker' and self.num_workers == self._index def device_fn(self): """Returns the function with the specification to create the graph in this server""" current_device = '/job:{}/task:{}'.format(self.type, self._index) ps_devices = '/job:ps' - return tf.train.replica_device_setter(ps_device=ps_devices, - worker_device=current_device, - cluster=self._cluster_spec) + return tf.train.replica_device_setter(ps_device=ps_devices, worker_device=current_device, cluster=self._cluster_spec) + + def create_server(self): + if self._server is None and self.ps_hosts and self.worker_hosts and not self.is_evaluator(): + # create server and join if it is a parameter server + self._server = tf.train.Server(self._cluster_spec, job_name=self.type, task_index=self._index) + if self.is_ps(): + self._server.join() def target(self): + if self._server is None: + self.create_server() if self._server is not None: return self._server.target else: return None - def user_last_worker_as_evaluator(self): - """ Returns a new :class:`TaskSpecDef` where the last worker has been removed from - the list of worker_hosts, so it is not used for training anymore. You can call - is_evaluator to know whether this server is the evaluator one or not. - In case there is only one server for training this method raises an exception, as - you cannot use any server for evaluation. - """ - if self.worker_hosts is None \ - or len(self.worker_hosts) == 0 \ - or (self.master is None and len(self.worker_hosts) == 1): + def use_last_worker_as_evaluator(self): + """Returns a new :class:`TaskSpecDef` where the last worker has been removed from + the list of worker_hosts, so it is not used for training anymore. You can call + is_evaluator to know whether this server is the evaluator one or not. + In case there is only one server for training this method raises an exception, as + you cannot use any server for evaluation. + + """ + if self.num_workers <= 1: raise Exception('You need more than one worker instance to use one as evaluator') - return TaskSpecDef(type=self.type, - index=self._index, - trial=self.trial, - ps_hosts=self.ps_hosts, - worker_hosts=self.worker_hosts[:-1], - master=self.master) + return TaskSpecDef( + task_type=self.type, index=self._index, trial=self.trial, ps_hosts=self.ps_hosts, worker_hosts=self.worker_hosts[:-1], master=self.master) -def TaskSpec(): - """Returns the a :class:`TaskSpecDef` based on the environment variables for distributed - training. +def create_task_spec_def(): + """Returns the a :class:`TaskSpecDef` based on the environment variables for distributed training. References ---------- - - `ML-engine trainer considerations `_ - - `TensorPort Distributed Computing `_ - """ + - `ML-engine trainer considerations `__ + - `TensorPort Distributed Computing `__ - # TF_CONFIG is used in ML-engine + """ if 'TF_CONFIG' in os.environ: + # TF_CONFIG is used in ML-engine env = json.loads(os.environ.get('TF_CONFIG', '{}')) task_data = env.get('task', None) or {'type': 'master', 'index': 0} cluster_data = env.get('cluster', None) or {'ps': None, 'worker': None, 'master': None} - return TaskSpecDef(type=task_data['type'], - index=task_data['index'], - trial=task_data['trial'] if 'trial' in task_data else None, - ps_hosts=cluster_data['ps'], - worker_hosts=cluster_data['worker'], - master=cluster_data['master'] if 'master' in cluster_data else None) - - # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort - if 'JOB_NAME' in os.environ: - return TaskSpecDef(type=os.environ['JOB_NAME'], - index=os.environ['TASK_INDEX'], - ps_hosts=os.environ.get('PS_HOSTS', None), - worker_hosts=os.environ.get('WORKER_HOSTS', None), - master=os.environ.get('MASTER_HOST', None)) - return None - - -def DistributedSession(task_spec=None, - checkpoint_dir=None, - scaffold=None, - hooks=None, - chief_only_hooks=None, - save_checkpoint_secs=600, - save_summaries_steps=object(), - save_summaries_secs=object(), - config=None, - stop_grace_period_secs=120, - log_step_count_steps=100): - """Creates a distributed session. It calls `MonitoredTrainingSession` to create a - :class:`MonitoredSession` for distributed training. + return TaskSpecDef( + task_type=task_data['type'], + index=task_data['index'], + trial=task_data['trial'] if 'trial' in task_data else None, + ps_hosts=cluster_data['ps'], + worker_hosts=cluster_data['worker'], + master=cluster_data['master'] if 'master' in cluster_data else None) + elif 'JOB_NAME' in os.environ: + # JOB_NAME, TASK_INDEX, PS_HOSTS, WORKER_HOSTS and MASTER_HOST are used in TensorPort + return TaskSpecDef( + task_type=os.environ['JOB_NAME'], + index=os.environ['TASK_INDEX'], + ps_hosts=os.environ.get('PS_HOSTS', None), + worker_hosts=os.environ.get('WORKER_HOSTS', None), + master=os.environ.get('MASTER_HOST', None)) + else: + raise Exception('You need to setup TF_CONFIG or JOB_NAME to define the task.') + + +def create_distributed_session(task_spec=None, + checkpoint_dir=None, + scaffold=None, + hooks=None, + chief_only_hooks=None, + save_checkpoint_secs=600, + save_summaries_steps=object(), + save_summaries_secs=object(), + config=None, + stop_grace_period_secs=120, + log_step_count_steps=100): + """Creates a distributed session. + + It calls `MonitoredTrainingSession` to create a :class:`MonitoredSession` for distributed training. + + Parameters + ---------- + task_spec : :class:`TaskSpecDef`. + The task spec definition from create_task_spec_def() + checkpoint_dir : str. + Optional path to a directory where to restore variables. + scaffold : ``Scaffold`` + A `Scaffold` used for gathering or building supportive ops. + If not specified, a default one is created. It's used to finalize the graph. + hooks : list of ``SessionRunHook`` objects. + Optional + chief_only_hooks : list of ``SessionRunHook`` objects. + Activate these hooks if `is_chief==True`, ignore otherwise. + save_checkpoint_secs : int + The frequency, in seconds, that a checkpoint is saved + using a default checkpoint saver. If `save_checkpoint_secs` is set to + `None`, then the default checkpoint saver isn't used. + save_summaries_steps : int + The frequency, in number of global steps, that the + summaries are written to disk using a default summary saver. If both + `save_summaries_steps` and `save_summaries_secs` are set to `None`, then + the default summary saver isn't used. Default 100. + save_summaries_secs : int + The frequency, in secs, that the summaries are written + to disk using a default summary saver. If both `save_summaries_steps` and + `save_summaries_secs` are set to `None`, then the default summary saver + isn't used. Default not enabled. + config : ``tf.ConfigProto`` + an instance of `tf.ConfigProto` proto used to configure the session. + It's the `config` argument of constructor of `tf.Session`. + stop_grace_period_secs : int + Number of seconds given to threads to stop after + `close()` has been called. + log_step_count_steps : int + The frequency, in number of global steps, that the + global step/sec is logged. Examples -------- - A simple example for distributed training where all the workers use the same dataset: >>> task_spec = TaskSpec() @@ -214,54 +253,26 @@ def DistributedSession(task_spec=None, >>> while not session.should_stop(): >>> session.run(tensors) - - Parameters - ---------- - task_spec : TaskSpecDef. The task spec definition from TaskSpec() - checkpoint_dir : A string. Optional path to a directory where to restore - variables. - scaffold : A `Scaffold` used for gathering or building supportive ops. If - not specified, a default one is created. It's used to finalize the graph. - hooks : Optional list of `SessionRunHook` objects. - chief_only_hooks : list of `SessionRunHook` objects. Activate these hooks if - `is_chief==True`, ignore otherwise. - save_checkpoint_secs : The frequency, in seconds, that a checkpoint is saved - using a default checkpoint saver. If `save_checkpoint_secs` is set to - `None`, then the default checkpoint saver isn't used. - save_summaries_steps : The frequency, in number of global steps, that the - summaries are written to disk using a default summary saver. If both - `save_summaries_steps` and `save_summaries_secs` are set to `None`, then - the default summary saver isn't used. Default 100. - save_summaries_secs : The frequency, in secs, that the summaries are written - to disk using a default summary saver. If both `save_summaries_steps` and - `save_summaries_secs` are set to `None`, then the default summary saver - isn't used. Default not enabled. - config : an instance of `tf.ConfigProto` proto used to configure the session. - It's the `config` argument of constructor of `tf.Session`. - stop_grace_period_secs : Number of seconds given to threads to stop after - `close()` has been called. - log_step_count_steps : The frequency, in number of global steps, that the - global step/sec is logged. - References ---------- - - `MonitoredTrainingSession `_ + - `MonitoredTrainingSession `__ + """ target = task_spec.target() if task_spec is not None else None is_chief = task_spec.is_master() if task_spec is not None else True - return tf.train.MonitoredTrainingSession(master=target, - is_chief=is_chief, - checkpoint_dir=checkpoint_dir, - scaffold=scaffold, - save_checkpoint_secs=save_checkpoint_secs, - save_summaries_steps=save_summaries_steps, - save_summaries_secs=save_summaries_secs, - log_step_count_steps=log_step_count_steps, - stop_grace_period_secs=stop_grace_period_secs, - config=config, - hooks=hooks, - chief_only_hooks=chief_only_hooks) - + return tf.train.MonitoredTrainingSession( + master=target, + is_chief=is_chief, + checkpoint_dir=checkpoint_dir, + scaffold=scaffold, + save_checkpoint_secs=save_checkpoint_secs, + save_summaries_steps=save_summaries_steps, + save_summaries_secs=save_summaries_secs, + log_step_count_steps=log_step_count_steps, + stop_grace_period_secs=stop_grace_period_secs, + config=config, + hooks=hooks, + chief_only_hooks=chief_only_hooks) class StopAtTimeHook(session_run_hook.SessionRunHook): @@ -269,11 +280,14 @@ class StopAtTimeHook(session_run_hook.SessionRunHook): Parameters ---------- - time_running: Maximum time running in seconds + time_running: int + Maximum time running in seconds + """ def __init__(self, time_running): self._time_running = time_running + self._end_time = 0 def begin(self): self._end_time = time.time() + self._time_running @@ -295,6 +309,7 @@ class LoadCheckpoint(session_run_hook.SessionRunHook): >>> with tf.SingularMonitoredSession(hooks=[checkpoint_hook]) as session: >>> while not session.should_stop(): >>> session.run(tensors) + """ def __init__(self, saver, checkpoint): @@ -306,3 +321,8 @@ def after_create_session(self, session, coord): if not self._loaded: self._loaded = True self._saver.restore(self._checkpoint) + + +# Alias +TaskSpec = create_task_spec_def +DistributedSession = create_distributed_session diff --git a/tensorlayer/files.py b/tensorlayer/files.py index 06446a1e..1dabaa8f 100644 --- a/tensorlayer/files.py +++ b/tensorlayer/files.py @@ -1,50 +1,81 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- +""" +A collections of helper functions to work with dataset. + +Load benchmark dataset, save and restore model, save and load variables. +TensorFlow provides ``.ckpt`` file format to save and restore the models, while +we suggest to use standard python file format ``.npz`` to save models for the +sake of cross-platform. + +.. code-block:: python + + ## save model as .ckpt + saver = tf.train.Saver() + save_path = saver.save(sess, "model.ckpt") + # restore model from .ckpt + saver = tf.train.Saver() + saver.restore(sess, "model.ckpt") + + ## save model as .npz + tl.files.save_npz(network.all_params , name='model.npz') + # restore model from .npz (method 1) + load_params = tl.files.load_npz(name='model.npz') + tl.files.assign_params(sess, load_params, network) + # restore model from .npz (method 2) + tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) + + ## you can assign the pre-trained parameters as follow + # 1st parameter + tl.files.assign_params(sess, [load_params[0]], network) + # the first three parameters + tl.files.assign_params(sess, load_params[:3], network) + +""" - -import tensorflow as tf +import gzip import os -import numpy as np +import pickle import re import sys import tarfile -import gzip import zipfile -from . import visualize -from . import nlp -from . import utils -import pickle -from six.moves import urllib -from six.moves import cPickle -from six.moves import zip + +import numpy as np +import tensorflow as tf +from six.moves import cPickle, zip from tensorflow.python.platform import gfile +from . import _logging as logging +from . import nlp, utils, visualize + ## Load dataset functions -def load_mnist_dataset(shape=(-1,784), path="data"): - """Automatically download MNIST dataset - and return the training, validation and test set with 50000, 10000 and 10000 - digit images respectively. +def load_mnist_dataset(shape=(-1, 784), path="data"): + """Load MNIST dataset. + + Automatically download MNIST dataset and return the training, validation and test set with 50000, 10000 and 10000 digit images respectively. Parameters ---------- shape : tuple - The shape of digit images, defaults is (-1,784) - path : string + The shape of digit images e.g. (-1,784) or (-1, 28, 28, 1). + path : str The path that the data is downloaded to, defaults is ``data/mnist/``. Examples -------- >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1,784)) >>> X_train, y_train, X_val, y_val, X_test, y_test = tl.files.load_mnist_dataset(shape=(-1, 28, 28, 1)) + """ path = os.path.join(path, 'mnist') + # We first define functions for loading MNIST images and labels. # For convenience, they also download the requested files if needed. def load_mnist_images(path, filename): filepath = maybe_download_and_extract(filename, path, 'http://yann.lecun.com/exdb/mnist/') - print(filepath) + logging.info(filepath) # Read the inputs in Yann LeCun's binary format. with gzip.open(filepath, 'rb') as f: data = np.frombuffer(f.read(), np.uint8, offset=16) @@ -65,7 +96,7 @@ def load_mnist_labels(path, filename): return data # Download and read the training and test set images and labels. - print("Load or Download MNIST > {}".format(path)) + logging.info("Load or Download MNIST > {}".format(path)) X_train = load_mnist_images(path, 'train-images-idx3-ubyte.gz') y_train = load_mnist_labels(path, 'train-labels-idx1-ubyte.gz') X_test = load_mnist_images(path, 't10k-images-idx3-ubyte.gz') @@ -85,8 +116,11 @@ def load_mnist_labels(path, filename): y_test = np.asarray(y_test, dtype=np.int32) return X_train, y_train, X_val, y_val, X_test, y_test -def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False, second=3): - """The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, with + +def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False): + """Load CIFAR-10 dataset. + + It consists of 60000 32x32 colour images in 10 classes, with 6000 images per class. There are 50000 training images and 10000 test images. The dataset is divided into five training batches and one test batch, each with @@ -98,13 +132,11 @@ def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False, sec Parameters ---------- shape : tupe - The shape of digit images: e.g. (-1, 3, 32, 32) and (-1, 32, 32, 3). - plotable : True, False - Whether to plot some image examples. - second : int - If ``plotable`` is True, ``second`` is the display time. - path : string + The shape of digit images e.g. (-1, 3, 32, 32) and (-1, 32, 32, 3). + path : str The path that the data is downloaded to, defaults is ``data/cifar10/``. + plotable : boolean + Whether to plot some image examples, False as default. Examples -------- @@ -112,12 +144,13 @@ def load_cifar10_dataset(shape=(-1, 32, 32, 3), path='data', plotable=False, sec References ---------- - - `CIFAR website `_ - - `Data download link `_ - - `Code references `_ + - `CIFAR website `__ + - `Data download link `__ + - ``__ + """ path = os.path.join(path, 'cifar10') - print("Load or Download cifar10 > {}".format(path)) + logging.info("Load or Download cifar10 > {}".format(path)) #Helper function to unpickle the data def unpickle(file): @@ -137,7 +170,7 @@ def unpickle(file): #Unpickle file and fill in data X_train = None y_train = [] - for i in range(1,6): + for i in range(1, 6): data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "data_batch_{}".format(i))) if i == 1: X_train = data_dic['data'] @@ -145,7 +178,7 @@ def unpickle(file): X_train = np.vstack((X_train, data_dic['data'])) y_train += data_dic['labels'] - test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch")) + test_data_dic = unpickle(os.path.join(path, 'cifar-10-batches-py/', "test_batch")) X_test = test_data_dic['data'] y_test = np.array(test_data_dic['labels']) @@ -164,36 +197,36 @@ def unpickle(file): y_train = np.array(y_train) if plotable == True: - print('\nCIFAR-10') + logging.info('\nCIFAR-10') import matplotlib.pyplot as plt fig = plt.figure(1) - print('Shape of a training image: X_train[0]',X_train[0].shape) + logging.info('Shape of a training image: X_train[0] %s' % X_train[0].shape) - plt.ion() # interactive mode + plt.ion() # interactive mode count = 1 for row in range(10): for col in range(10): a = fig.add_subplot(10, 10, count) if shape == (-1, 3, 32, 32): # plt.imshow(X_train[count-1], interpolation='nearest') - plt.imshow(np.transpose(X_train[count-1], (1, 2, 0)), interpolation='nearest') + plt.imshow(np.transpose(X_train[count - 1], (1, 2, 0)), interpolation='nearest') # plt.imshow(np.transpose(X_train[count-1], (2, 1, 0)), interpolation='nearest') elif shape == (-1, 32, 32, 3): - plt.imshow(X_train[count-1], interpolation='nearest') + plt.imshow(X_train[count - 1], interpolation='nearest') # plt.imshow(np.transpose(X_train[count-1], (1, 0, 2)), interpolation='nearest') else: raise Exception("Do not support the given 'shape' to plot the image examples") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # 不显示刻度(tick) + plt.gca().xaxis.set_major_locator(plt.NullLocator()) # 不显示刻度(tick) plt.gca().yaxis.set_major_locator(plt.NullLocator()) count = count + 1 - plt.draw() # interactive mode - plt.pause(3) # interactive mode + plt.draw() # interactive mode + plt.pause(3) # interactive mode - print("X_train:",X_train.shape) - print("y_train:",y_train.shape) - print("X_test:",X_test.shape) - print("y_test:",y_test.shape) + logging.info("X_train: %s" % X_train.shape) + logging.info("y_train: %s" % y_train.shape) + logging.info("X_test: %s" % X_test.shape) + logging.info("y_test: %s" % y_test.shape) X_train = np.asarray(X_train, dtype=np.float32) X_test = np.asarray(X_test, dtype=np.float32) @@ -202,8 +235,11 @@ def unpickle(file): return X_train, y_train, X_test, y_test + def load_ptb_dataset(path='data'): - """Penn TreeBank (PTB) dataset is used in many LANGUAGE MODELING papers, + """Load Penn TreeBank (PTB) dataset. + + It is used in many LANGUAGE MODELING papers, including "Empirical Evaluation and Combination of Advanced Language Modeling Techniques", "Recurrent Neural Network Regularization". It consists of 929k training words, 73k validation words, and 82k test @@ -211,12 +247,15 @@ def load_ptb_dataset(path='data'): Parameters ---------- - path : : string + path : str The path that the data is downloaded to, defaults is ``data/ptb/``. Returns -------- - train_data, valid_data, test_data, vocabulary size + train_data, valid_data, test_data : list of int + The training, validating and testing data in integer format. + vocab_size : int + The vocabulary size. Examples -------- @@ -225,10 +264,15 @@ def load_ptb_dataset(path='data'): References --------------- - ``tensorflow.models.rnn.ptb import reader`` - - `Manual download `_ + - `Manual download `__ + + Notes + ------ + - If you want to get the raw data, see the source code. + """ path = os.path.join(path, 'ptb') - print("Load or Download Penn TreeBank (PTB) dataset > {}".format(path)) + logging.info("Load or Download Penn TreeBank (PTB) dataset > {}".format(path)) #Maybe dowload and uncompress tar, or load exsisting files filename = 'simple-examples.tgz' @@ -245,39 +289,42 @@ def load_ptb_dataset(path='data'): train_data = nlp.words_to_word_ids(nlp.read_words(train_path), word_to_id) valid_data = nlp.words_to_word_ids(nlp.read_words(valid_path), word_to_id) test_data = nlp.words_to_word_ids(nlp.read_words(test_path), word_to_id) - vocabulary = len(word_to_id) + vocab_size = len(word_to_id) - # print(nlp.read_words(train_path)) # ... 'according', 'to', 'mr.', '', ''] - # print(train_data) # ... 214, 5, 23, 1, 2] - # print(word_to_id) # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '': 2 ... } - # print(vocabulary) # 10000 + # logging.info(nlp.read_words(train_path)) # ... 'according', 'to', 'mr.', '', ''] + # logging.info(train_data) # ... 214, 5, 23, 1, 2] + # logging.info(word_to_id) # ... 'beyond': 1295, 'anti-nuclear': 9599, 'trouble': 1520, '': 2 ... } + # logging.info(vocabulary) # 10000 # exit() - return train_data, valid_data, test_data, vocabulary + return train_data, valid_data, test_data, vocab_size + def load_matt_mahoney_text8_dataset(path='data'): - """Download a text file from Matt Mahoney's website + """Load Matt Mahoney's dataset. + + Download a text file from Matt Mahoney's website if not present, and make sure it's the right size. Extract the first file enclosed in a zip file as a list of words. This dataset can be used for Word Embedding. Parameters ---------- - path : : string + path : str The path that the data is downloaded to, defaults is ``data/mm_test8/``. Returns -------- - word_list : a list - a list of string (word).\n - e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...] + list of str + The raw text data e.g. [.... 'their', 'families', 'who', 'were', 'expelled', 'from', 'jerusalem', ...] Examples -------- >>> words = tl.files.load_matt_mahoney_text8_dataset() >>> print('Data size', len(words)) + """ path = os.path.join(path, 'mm_test8') - print("Load or Download matt_mahoney_text8 Dataset> {}".format(path)) + logging.info("Load or Download matt_mahoney_text8 Dataset> {}".format(path)) filename = 'text8.zip' url = 'http://mattmahoney.net/dc/' @@ -289,15 +336,28 @@ def load_matt_mahoney_text8_dataset(path='data'): word_list[idx] = word_list[idx].decode() return word_list -def load_imdb_dataset(path='data', nb_words=None, skip_top=0, - maxlen=None, test_split=0.2, seed=113, - start_char=1, oov_char=2, index_from=3): - """Load IMDB dataset + +def load_imdb_dataset(path='data', nb_words=None, skip_top=0, maxlen=None, test_split=0.2, seed=113, start_char=1, oov_char=2, index_from=3): + """Load IMDB dataset. Parameters ---------- - path : : string + path : str The path that the data is downloaded to, defaults is ``data/imdb/``. + nb_words : int + Number of words to get. + skip_top : int + Top most frequent words to ignore (they will appear as oov_char value in the sequence data). + maxlen : int + Maximum sequence length. Any longer sequence will be truncated. + seed : int + Seed for reproducible data shuffling. + start_char : int + The start of a sequence will be marked with this character. Set to 1 because 0 is usually the padding character. + oov_char : int + Words that were cut out because of the num_words or skip_top limit will be replaced with this character. + index_from : int + Index actual words with this index and higher. Examples -------- @@ -310,7 +370,8 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0, References ----------- - - `Modified from keras. `_ + - `Modified from keras. `__ + """ path = os.path.join(path, 'imdb') @@ -346,9 +407,7 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0, X = new_X labels = new_labels if not X: - raise Exception('After filtering for sequences shorter than maxlen=' + - str(maxlen) + ', no sequence was kept. ' - 'Increase maxlen.') + raise Exception('After filtering for sequences shorter than maxlen=' + str(maxlen) + ', no sequence was kept. ' 'Increase maxlen.') if not nb_words: nb_words = max([max(x) for x in X]) @@ -374,23 +433,29 @@ def load_imdb_dataset(path='data', nb_words=None, skip_top=0, return X_train, y_train, X_test, y_test + def load_nietzsche_dataset(path='data'): """Load Nietzsche dataset. - Returns a string. Parameters ---------- - path : string + path : str The path that the data is downloaded to, defaults is ``data/nietzsche/``. + Returns + -------- + str + The content. + Examples -------- >>> see tutorial_generate_text.py >>> words = tl.files.load_nietzsche_dataset() >>> words = basic_clean_str(words) >>> words = words.split() + """ - print("Load or Download nietzsche dataset > {}".format(path)) + logging.info("Load or Download nietzsche dataset > {}".format(path)) path = os.path.join(path, 'nietzsche') filename = "nietzsche.txt" @@ -401,15 +466,16 @@ def load_nietzsche_dataset(path='data'): words = f.read() return words + def load_wmt_en_fr_dataset(path='data'): - """It will download English-to-French translation data from the WMT'15 - Website (10^9-French-English corpus), and the 2013 news test from - the same site as development set. + """Load WMT'15 English-to-French translation dataset. + + It will download the data from the WMT'15 Website (10^9-French-English corpus), and the 2013 news test from the same site as development set. Returns the directories of training data and test data. Parameters ---------- - path : string + path : str The path that the data is downloaded to, defaults is ``data/wmt_en_fr/``. References @@ -419,6 +485,7 @@ def load_wmt_en_fr_dataset(path='data'): Notes ----- Usually, it will take a long time to download this dataset. + """ path = os.path.join(path, 'wmt_en_fr') # URLs for WMT data. @@ -427,7 +494,7 @@ def load_wmt_en_fr_dataset(path='data'): def gunzip_file(gz_path, new_path): """Unzips from gz_path into new_path.""" - print("Unpacking %s to %s" % (gz_path, new_path)) + logging.info("Unpacking %s to %s" % (gz_path, new_path)) with gzip.open(gz_path, "rb") as gz_file: with open(new_path, "wb") as new_file: for line in gz_file: @@ -449,203 +516,226 @@ def get_wmt_enfr_dev_set(path): dev_name = "newstest2013" dev_path = os.path.join(path, "newstest2013") if not (gfile.Exists(dev_path + ".fr") and gfile.Exists(dev_path + ".en")): - print("Extracting tgz file %s" % dev_file) + logging.info("Extracting tgz file %s" % dev_file) with tarfile.open(dev_file, "r:gz") as dev_tar: - fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") - en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") - fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. - en_dev_file.name = dev_name + ".en" - dev_tar.extract(fr_dev_file, path) - dev_tar.extract(en_dev_file, path) + fr_dev_file = dev_tar.getmember("dev/" + dev_name + ".fr") + en_dev_file = dev_tar.getmember("dev/" + dev_name + ".en") + fr_dev_file.name = dev_name + ".fr" # Extract without "dev/" prefix. + en_dev_file.name = dev_name + ".en" + dev_tar.extract(fr_dev_file, path) + dev_tar.extract(en_dev_file, path) return dev_path - print("Load or Download WMT English-to-French translation > {}".format(path)) + logging.info("Load or Download WMT English-to-French translation > {}".format(path)) train_path = get_wmt_enfr_train_set(path) dev_path = get_wmt_enfr_dev_set(path) return train_path, dev_path + def load_flickr25k_dataset(tag='sky', path="data", n_threads=50, printable=False): - """Returns a list of images by a given tag from Flick25k dataset, - it will download Flickr25k from `the official website `_ + """Load Flickr25K dataset. + + Returns a list of images by a given tag from Flick25k dataset, + it will download Flickr25k from `the official website `__ at the first time you use it. Parameters ------------ - tag : string or None - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `_. - If you want to get all images, set to ``None``. - path : string + tag : str or None + What images to return. + - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. + - If you want to get all images, set to ``None``. + + path : str The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int, number of thread to read image. - printable : bool, print infomation when reading images, default is ``False``. + n_threads : int + The number of thread to read image. + printable : boolean + Whether to print infomation when reading images, default is ``False``. Examples ----------- - - Get images with tag of sky + Get images with tag of sky + >>> images = tl.files.load_flickr25k_dataset(tag='sky') - - Get all images + Get all images + >>> images = tl.files.load_flickr25k_dataset(tag=None, n_threads=100, printable=True) + """ path = os.path.join(path, 'flickr25k') filename = 'mirflickr25k.zip' url = 'http://press.liacs.nl/mirflickr/mirflickr25k/' ## download dataset - if folder_exists(path+"/mirflickr") is False: - print("[*] Flickr25k is nonexistent in {}".format(path)) + if folder_exists(path + "/mirflickr") is False: + logging.info("[*] Flickr25k is nonexistent in {}".format(path)) maybe_download_and_extract(filename, path, url, extract=True) - del_file(path+'/'+filename) + del_file(path + '/' + filename) ## return images by the given tag. # 1. image path list - folder_imgs = path+"/mirflickr" + folder_imgs = path + "/mirflickr" path_imgs = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) path_imgs.sort(key=natural_keys) - # print(path_imgs[0:10]) + # logging.info(path_imgs[0:10]) # 2. tag path list - folder_tags = path+"/mirflickr/meta/tags" + folder_tags = path + "/mirflickr/meta/tags" path_tags = load_file_list(path=folder_tags, regx='\\.txt', printable=False) path_tags.sort(key=natural_keys) - # print(path_tags[0:10]) + # logging.info(path_tags[0:10]) # 3. select images if tag is None: - print("[Flickr25k] reading all images") + logging.info("[Flickr25k] reading all images") else: - print("[Flickr25k] reading images with tag: {}".format(tag)) + logging.info("[Flickr25k] reading images with tag: {}".format(tag)) images_list = [] for idx in range(0, len(path_tags)): - tags = read_file(folder_tags+'/'+path_tags[idx]).split('\n') - # print(idx+1, tags) + tags = read_file(folder_tags + '/' + path_tags[idx]).split('\n') + # logging.info(idx+1, tags) if tag is None or tag in tags: images_list.append(path_imgs[idx]) images = visualize.read_images(images_list, folder_imgs, n_threads=n_threads, printable=printable) return images + def load_flickr1M_dataset(tag='sky', size=10, path="data", n_threads=50, printable=False): - """Returns a list of images by a given tag from Flickr1M dataset, - it will download Flickr1M from `the official website `_ + """Load Flick1M dataset. + + Returns a list of images by a given tag from Flickr1M dataset, + it will download Flickr1M from `the official website `__ at the first time you use it. Parameters ------------ - tag : string or None - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `_. - If you want to get all images, set to ``None``. - size : int 1 to 10. - 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. - path : string + tag : str or None + What images to return. + - If you want to get images with tag, use string like 'dog', 'red', see `Flickr Search `__. + - If you want to get all images, set to ``None``. + + size : int + integer between 1 to 10. 1 means 100k images ... 5 means 500k images, 10 means all 1 million images. Default is 10. + path : str The path that the data is downloaded to, defaults is ``data/flickr25k/``. - n_threads : int, number of thread to read image. - printable : bool, print infomation when reading images, default is ``False``. + n_threads : int + The number of thread to read image. + printable : boolean + Whether to print infomation when reading images, default is ``False``. Examples ---------- - - Use 200k images + Use 200k images + >>> images = tl.files.load_flickr1M_dataset(tag='zebra', size=2) - - Use 1 Million images + Use 1 Million images + >>> images = tl.files.load_flickr1M_dataset(tag='zebra') + """ path = os.path.join(path, 'flickr1M') - print("[Flickr1M] using {}% of images = {}".format(size*10, size*100000)) - images_zip = ['images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', - 'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip', - 'images8.zip', 'images9.zip'] + logging.info("[Flickr1M] using {}% of images = {}".format(size * 10, size * 100000)) + images_zip = [ + 'images0.zip', 'images1.zip', 'images2.zip', 'images3.zip', 'images4.zip', 'images5.zip', 'images6.zip', 'images7.zip', 'images8.zip', 'images9.zip' + ] tag_zip = 'tags.zip' url = 'http://press.liacs.nl/mirflickr/mirflickr1m/' ## download dataset for image_zip in images_zip[0:size]: image_folder = image_zip.split(".")[0] - # print(path+"/"+image_folder) - if folder_exists(path+"/"+image_folder) is False: - # print(image_zip) - print("[Flickr1M] {} is missing in {}".format(image_folder, path)) + # logging.info(path+"/"+image_folder) + if folder_exists(path + "/" + image_folder) is False: + # logging.info(image_zip) + logging.info("[Flickr1M] {} is missing in {}".format(image_folder, path)) maybe_download_and_extract(image_zip, path, url, extract=True) - del_file(path+'/'+image_zip) - os.system("mv {} {}".format(path+'/images',path+'/'+image_folder)) + del_file(path + '/' + image_zip) + os.system("mv {} {}".format(path + '/images', path + '/' + image_folder)) else: - print("[Flickr1M] {} exists in {}".format(image_folder, path)) + logging.info("[Flickr1M] {} exists in {}".format(image_folder, path)) ## download tag - if folder_exists(path+"/tags") is False: - print("[Flickr1M] tag files is nonexistent in {}".format(path)) + if folder_exists(path + "/tags") is False: + logging.info("[Flickr1M] tag files is nonexistent in {}".format(path)) maybe_download_and_extract(tag_zip, path, url, extract=True) - del_file(path+'/'+tag_zip) + del_file(path + '/' + tag_zip) else: - print("[Flickr1M] tags exists in {}".format(path)) + logging.info("[Flickr1M] tags exists in {}".format(path)) ## 1. image path list images_list = [] images_folder_list = [] for i in range(0, size): - images_folder_list += load_folder_list(path=path+'/images%d'%i) - images_folder_list.sort(key=lambda s : int(s.split('/')[-1])) # folder/images/ddd - # print(images_folder_list) + images_folder_list += load_folder_list(path=path + '/images%d' % i) + images_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd + # logging.info(images_folder_list) # exit() - for folder in images_folder_list[0:size*10]: + for folder in images_folder_list[0:size * 10]: tmp = load_file_list(path=folder, regx='\\.jpg', printable=False) - tmp.sort(key=lambda s : int(s.split('.')[-2])) # ddd.jpg - # print(tmp[0::570]) - images_list.extend([folder+'/'+x for x in tmp]) - # print('IM', len(images_list), images_list[0::6000]) + tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.jpg + # logging.info(tmp[0::570]) + images_list.extend([folder + '/' + x for x in tmp]) + # logging.info('IM', len(images_list), images_list[0::6000]) ## 2. tag path list tag_list = [] - tag_folder_list = load_folder_list(path+"/tags") - tag_folder_list.sort(key=lambda s : int(s.split('/')[-1])) # folder/images/ddd + tag_folder_list = load_folder_list(path + "/tags") + tag_folder_list.sort(key=lambda s: int(s.split('/')[-1])) # folder/images/ddd - for folder in tag_folder_list[0:size*10]: - # print(folder) + for folder in tag_folder_list[0:size * 10]: + # logging.info(folder) tmp = load_file_list(path=folder, regx='\\.txt', printable=False) - tmp.sort(key=lambda s : int(s.split('.')[-2])) # ddd.txt - tmp = [folder+'/'+s for s in tmp] + tmp.sort(key=lambda s: int(s.split('.')[-2])) # ddd.txt + tmp = [folder + '/' + s for s in tmp] tag_list += tmp - # print('T', len(tag_list), tag_list[0::6000]) + # logging.info('T', len(tag_list), tag_list[0::6000]) # exit() ## 3. select images - print("[Flickr1M] searching tag: {}".format(tag)) + logging.info("[Flickr1M] searching tag: {}".format(tag)) select_images_list = [] for idx in range(0, len(tag_list)): tags = read_file(tag_list[idx]).split('\n') if tag in tags: select_images_list.append(images_list[idx]) - # print(idx, tags, tag_list[idx], images_list[idx]) - print("[Flickr1M] reading images with tag: {}".format(tag)) + # logging.info(idx, tags, tag_list[idx], images_list[idx]) + logging.info("[Flickr1M] reading images with tag: {}".format(tag)) images = visualize.read_images(select_images_list, '', n_threads=n_threads, printable=printable) return images + def load_cyclegan_dataset(filename='summer2winter_yosemite', path='data'): - """Load image data from CycleGAN's database, see `this link `_. + """Load images from CycleGAN's database, see `this link `__. Parameters ------------ - filename : string - The dataset you want, see `this link `_. - path : string + filename : str + The dataset you want, see `this link `__. + path : str The path that the data is downloaded to, defaults is `data/cyclegan` Examples --------- >>> im_train_A, im_train_B, im_test_A, im_test_B = load_cyclegan_dataset(filename='summer2winter_yosemite') + """ path = os.path.join(path, 'cyclegan') url = 'https://people.eecs.berkeley.edu/~taesung_park/CycleGAN/datasets/' if folder_exists(os.path.join(path, filename)) is False: - print("[*] {} is nonexistent in {}".format(filename, path)) - maybe_download_and_extract(filename+'.zip', path, url, extract=True) - del_file(os.path.join(path, filename+'.zip')) + logging.info("[*] {} is nonexistent in {}".format(filename, path)) + maybe_download_and_extract(filename + '.zip', path, url, extract=True) + del_file(os.path.join(path, filename + '.zip')) def load_image_from_folder(path): path_imgs = load_file_list(path=path, regx='\\.jpg', printable=False) return visualize.read_images(path_imgs, path=path, n_threads=10, printable=False) + im_train_A = load_image_from_folder(os.path.join(path, filename, "trainA")) im_train_B = load_image_from_folder(os.path.join(path, filename, "trainB")) im_test_A = load_image_from_folder(os.path.join(path, filename, "testA")) im_test_B = load_image_from_folder(os.path.join(path, filename, "testB")) - def if_2d_to_3d(images): # [h, w] --> [h, w, 3] + def if_2d_to_3d(images): # [h, w] --> [h, w, 3] for i in range(len(images)): if len(images[i].shape) == 2: images[i] = images[i][:, :, np.newaxis] @@ -659,97 +749,121 @@ def if_2d_to_3d(images): # [h, w] --> [h, w, 3] return im_train_A, im_train_B, im_test_A, im_test_B + def download_file_from_google_drive(id, destination): - """ Download file from Google Drive, see ``load_celebA_dataset`` for example. + """Download file from Google Drive. + + See ``tl.files.load_celebA_dataset`` for example. Parameters -------------- - id : driver ID - destination : string, save path. + id : str + The driver ID. + destination : str + The destination for save file. + """ from tqdm import tqdm import requests - def save_response_content(response, destination, chunk_size=32*1024): + + def save_response_content(response, destination, chunk_size=32 * 1024): total_size = int(response.headers.get('content-length', 0)) with open(destination, "wb") as f: - for chunk in tqdm(response.iter_content(chunk_size), total=total_size, - unit='B', unit_scale=True, desc=destination): - if chunk: # filter out keep-alive new chunks + for chunk in tqdm(response.iter_content(chunk_size), total=total_size, unit='B', unit_scale=True, desc=destination): + if chunk: # filter out keep-alive new chunks f.write(chunk) + def get_confirm_token(response): for key, value in response.cookies.items(): if key.startswith('download_warning'): return value return None + URL = "https://docs.google.com/uc?export=download" session = requests.Session() - response = session.get(URL, params={ 'id': id }, stream=True) + response = session.get(URL, params={'id': id}, stream=True) token = get_confirm_token(response) if token: - params = { 'id' : id, 'confirm' : token } + params = {'id': id, 'confirm': token} response = session.get(URL, params=params, stream=True) save_response_content(response, destination) -def load_celebA_dataset(dirpath='data'): - """ Automatically download celebA dataset, and return a list of image path. """ + +def load_celebA_dataset(path='data'): + """Load CelebA dataset + + Return a list of image path. + + Parameters + ----------- + path : str + The path that the data is downloaded to, defaults is ``data/celebA/``. + + """ import zipfile, os data_dir = 'celebA' - filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM" - save_path = os.path.join(dirpath, filename) - image_path = os.path.join(dirpath, data_dir) + filename, drive_id = "img_align_celeba.zip", "0B7EVK8r0v71pZjFTYXZWM3FlRnM" + save_path = os.path.join(path, filename) + image_path = os.path.join(path, data_dir) if os.path.exists(image_path): - print('[*] {} already exists'.format(save_path)) + logging.info('[*] {} already exists'.format(save_path)) else: - exists_or_mkdir(dirpath) + exists_or_mkdir(path) download_file_from_google_drive(drive_id, save_path) zip_dir = '' with zipfile.ZipFile(save_path) as zf: zip_dir = zf.namelist()[0] - zf.extractall(dirpath) + zf.extractall(path) os.remove(save_path) - os.rename(os.path.join(dirpath, zip_dir), image_path) + os.rename(os.path.join(path, zip_dir), image_path) data_files = load_file_list(path=image_path, regx='\\.jpg', printable=False) for i in range(len(data_files)): - data_files[i] = os.path.join(image_path, data_files[i]) + data_files[i] = os.path.join(image_path, data_files[i]) return data_files + def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=False): - """ Pascal VOC 2007/2012 Dataset has 20 objects : aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor and additional 3 classes : head, hand, foot for person. + """Pascal VOC 2007/2012 Dataset. + + It has 20 objects: + aeroplane, bicycle, bird, boat, bottle, bus, car, cat, chair, cow, diningtable, dog, horse, motorbike, person, pottedplant, sheep, sofa, train, tvmonitor + and additional 3 classes : head, hand, foot for person. Parameters ----------- - path : string + path : str The path that the data is downloaded to, defaults is ``data/VOC``. - dataset : string, 2012, 2007, 2007test or 2012test. - The VOC dataset version, we usually train model on 2007+2012 and test it on 2007test. - contain_classes_in_person : If True, dataset will contains labels of head, hand and foot. + dataset : str + The VOC dataset version, `2012`, `2007`, `2007test` or `2012test`. We usually train model on `2007+2012` and test it on `2007test`. + contain_classes_in_person : boolean + Whether include head, hand and foot annotation, default is False. Returns --------- - imgs_file_list : list of string. + imgs_file_list : list of str Full paths of all images. - imgs_semseg_file_list : list of string. + imgs_semseg_file_list : list of str Full paths of all maps for semantic segmentation. Note that not all images have this map! - imgs_insseg_file_list : list of string. + imgs_insseg_file_list : list of str Full paths of all maps for instance segmentation. Note that not all images have this map! - imgs_ann_file_list : list of string. + imgs_ann_file_list : list of str Full paths of all annotations for bounding box and object class, all images have this annotations. - classes : list of string. + classes : list of str Classes in order. - classes_in_person : list of string. + classes_in_person : list of str Classes in person. - classes_dict : dictionary. + classes_dict : dictionary Class label to integer. - n_objs_list : list of integer - Number of objects in all images in ``imgs_file_list` in order. - objs_info_list : list of string. + n_objs_list : list of int + Number of objects in all images in ``imgs_file_list`` in order. + objs_info_list : list of str Darknet format for the annotation of all images in ``imgs_file_list`` in order. ``[class_id x_centre y_centre width height]`` in ratio format. - objs_info_dicts : dictionary. - ``{imgs_file_list : dictionary for annotation}``, the annotation of all images in ``imgs_file_list``, - format from `TensorFlow/Models/object-detection `_. + objs_info_dicts : dictionary + The annotation of all images in ``imgs_file_list``, ``{imgs_file_list : dictionary for annotation}``, + format from `TensorFlow/Models/object-detection `__. Examples ---------- @@ -779,56 +893,60 @@ def load_voc_dataset(path='data', dataset='2012', contain_classes_in_person=Fals References ------------- - - `Pascal VOC2012 Website `_. - - `Pascal VOC2007 Website `_. - - `TensorFlow/Models/object-detection `_. + - `Pascal VOC2012 Website `__. + - `Pascal VOC2007 Website `__. + """ - path= os.path.join(path, 'VOC') + path = os.path.join(path, 'VOC') def _recursive_parse_xml_to_dict(xml): - """Recursively parses XML contents to python dict. - We assume that `object` tags are the only ones that can appear - multiple times at the same level of a tree. - - Args: - xml: xml tree obtained by parsing XML file contents using lxml.etree - - Returns: - Python dictionary holding XML contents. - """ - if not xml: - # if xml is not None: - return {xml.tag: xml.text} - result = {} - for child in xml: - child_result = _recursive_parse_xml_to_dict(child) - if child.tag != 'object': - result[child.tag] = child_result[child.tag] - else: - if child.tag not in result: - result[child.tag] = [] - result[child.tag].append(child_result[child.tag]) - return {xml.tag: result} + """Recursively parses XML contents to python dict. + + We assume that `object` tags are the only ones that can appear + multiple times at the same level of a tree. + + Args: + xml: xml tree obtained by parsing XML file contents using lxml.etree + + Returns: + Python dictionary holding XML contents. + + """ + if not xml: + # if xml is not None: + return {xml.tag: xml.text} + result = {} + for child in xml: + child_result = _recursive_parse_xml_to_dict(child) + if child.tag != 'object': + result[child.tag] = child_result[child.tag] + else: + if child.tag not in result: + result[child.tag] = [] + result[child.tag].append(child_result[child.tag]) + return {xml.tag: result} - from lxml import etree # pip install lxml + from lxml import etree # pip install lxml import xml.etree.ElementTree as ET ## if dataset == "2012": url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2012/" tar_filename = "VOCtrainval_11-May-2012.tar" - extracted_filename = "VOC2012"#"VOCdevkit/VOC2012" - print(" [============= VOC 2012 =============]") + extracted_filename = "VOC2012" #"VOCdevkit/VOC2012" + logging.info(" [============= VOC 2012 =============]") elif dataset == "2012test": - extracted_filename = "VOC2012test"#"VOCdevkit/VOC2012" - print(" [============= VOC 2012 Test Set =============]") - print(" \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n") + extracted_filename = "VOC2012test" #"VOCdevkit/VOC2012" + logging.info(" [============= VOC 2012 Test Set =============]") + logging.info(" \nAuthor: 2012test only have person annotation, so 2007test is highly recommended for testing !\n") import time time.sleep(3) if os.path.isdir(os.path.join(path, extracted_filename)) is False: - print("For VOC 2012 Test data - online registration required") - print(" Please download VOC2012test.tar from: \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar") - print(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path) + logging.info("For VOC 2012 Test data - online registration required") + logging.info( + " Please download VOC2012test.tar from: \n register: http://host.robots.ox.ac.uk:8080 \n voc2012 : http://host.robots.ox.ac.uk:8080/eval/challenges/voc2012/ \ndownload: http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar" + ) + logging.info(" unzip VOC2012test.tar,rename the folder to VOC2012test and put it into %s" % path) exit() # # http://host.robots.ox.ac.uk:8080/eval/downloads/VOC2012test.tar # url = "http://host.robots.ox.ac.uk:8080/eval/downloads/" @@ -837,14 +955,14 @@ def _recursive_parse_xml_to_dict(xml): url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" tar_filename = "VOCtrainval_06-Nov-2007.tar" extracted_filename = "VOC2007" - print(" [============= VOC 2007 =============]") + logging.info(" [============= VOC 2007 =============]") elif dataset == "2007test": # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/index.html#testdata # http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar url = "http://host.robots.ox.ac.uk/pascal/VOC/voc2007/" tar_filename = "VOCtest_06-Nov-2007.tar" extracted_filename = "VOC2007test" - print(" [============= VOC 2007 Test Set =============]") + logging.info(" [============= VOC 2007 Test Set =============]") else: raise Exception("Please set the dataset aug to 2012, 2012test or 2007.") @@ -852,7 +970,7 @@ def _recursive_parse_xml_to_dict(xml): if dataset != "2012test": from sys import platform as _platform if folder_exists(os.path.join(path, extracted_filename)) is False: - print("[VOC] {} is nonexistent in {}".format(extracted_filename, path)) + logging.info("[VOC] {} is nonexistent in {}".format(extracted_filename, path)) maybe_download_and_extract(tar_filename, path, url, extract=True) del_file(os.path.join(path, tar_filename)) if dataset == "2012": @@ -872,44 +990,45 @@ def _recursive_parse_xml_to_dict(xml): os.system("mv {}/VOCdevkit/VOC2007 {}/VOC2007test".format(path, path)) del_folder(os.path.join(path, 'VOCdevkit')) ##======== object classes(labels) NOTE: YOU CAN CUSTOMIZE THIS LIST - classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", - "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", - "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"] + classes = [ + "aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", + "pottedplant", "sheep", "sofa", "train", "tvmonitor" + ] if contain_classes_in_person: classes_in_person = ["head", "hand", "foot"] else: classes_in_person = [] - classes += classes_in_person # use extra 3 classes for person + classes += classes_in_person # use extra 3 classes for person classes_dict = utils.list_string_to_dict(classes) - print("[VOC] object classes {}".format(classes_dict)) + logging.info("[VOC] object classes {}".format(classes_dict)) ##======== 1. image path list # folder_imgs = path+"/"+extracted_filename+"/JPEGImages/" folder_imgs = os.path.join(path, extracted_filename, "JPEGImages") imgs_file_list = load_file_list(path=folder_imgs, regx='\\.jpg', printable=False) - print("[VOC] {} images found".format(len(imgs_file_list))) - imgs_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.jpg --> 2007000027 + logging.info("[VOC] {} images found".format(len(imgs_file_list))) + imgs_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000027.jpg --> 2007000027 imgs_file_list = [os.path.join(folder_imgs, s) for s in imgs_file_list] - # print('IM',imgs_file_list[0::3333], imgs_file_list[-1]) + # logging.info('IM',imgs_file_list[0::3333], imgs_file_list[-1]) if dataset != "2012test": ##======== 2. semantic segmentation maps path list # folder_semseg = path+"/"+extracted_filename+"/SegmentationClass/" folder_semseg = os.path.join(path, extracted_filename, "SegmentationClass") imgs_semseg_file_list = load_file_list(path=folder_semseg, regx='\\.png', printable=False) - print("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list))) - imgs_semseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 + logging.info("[VOC] {} maps for semantic segmentation found".format(len(imgs_semseg_file_list))) + imgs_semseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 imgs_semseg_file_list = [os.path.join(folder_semseg, s) for s in imgs_semseg_file_list] - # print('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1]) + # logging.info('Semantic Seg IM',imgs_semseg_file_list[0::333], imgs_semseg_file_list[-1]) ##======== 3. instance segmentation maps path list # folder_insseg = path+"/"+extracted_filename+"/SegmentationObject/" folder_insseg = os.path.join(path, extracted_filename, "SegmentationObject") imgs_insseg_file_list = load_file_list(path=folder_insseg, regx='\\.png', printable=False) - print("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list))) - imgs_insseg_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 + logging.info("[VOC] {} maps for instance segmentation found".format(len(imgs_semseg_file_list))) + imgs_insseg_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000032.png --> 2007000032 imgs_insseg_file_list = [os.path.join(folder_insseg, s) for s in imgs_insseg_file_list] - # print('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1]) + # logging.info('Instance Seg IM',imgs_insseg_file_list[0::333], imgs_insseg_file_list[-1]) else: imgs_semseg_file_list = [] imgs_insseg_file_list = [] @@ -917,12 +1036,12 @@ def _recursive_parse_xml_to_dict(xml): # folder_ann = path+"/"+extracted_filename+"/Annotations/" folder_ann = os.path.join(path, extracted_filename, "Annotations") imgs_ann_file_list = load_file_list(path=folder_ann, regx='\\.xml', printable=False) - print("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list))) - imgs_ann_file_list.sort(key=lambda s : int(s.replace('.',' ').replace('_', '').split(' ')[-2])) # 2007_000027.xml --> 2007000027 + logging.info("[VOC] {} XML annotation files for bounding box and object class found".format(len(imgs_ann_file_list))) + imgs_ann_file_list.sort(key=lambda s: int(s.replace('.', ' ').replace('_', '').split(' ')[-2])) # 2007_000027.xml --> 2007000027 imgs_ann_file_list = [os.path.join(folder_ann, s) for s in imgs_ann_file_list] - # print('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1]) + # logging.info('ANN',imgs_ann_file_list[0::3333], imgs_ann_file_list[-1]) - if dataset == "2012test": # remove unused images in JPEG folder + if dataset == "2012test": # remove unused images in JPEG folder imgs_file_list_new = [] for ann in imgs_ann_file_list: ann = os.path.split(ann)[-1].split('.')[0] @@ -931,24 +1050,24 @@ def _recursive_parse_xml_to_dict(xml): imgs_file_list_new.append(im) break imgs_file_list = imgs_file_list_new - print("[VOC] keep %d images" % len(imgs_file_list_new)) + logging.info("[VOC] keep %d images" % len(imgs_file_list_new)) ##======== parse XML annotations def convert(size, box): - dw = 1./size[0] - dh = 1./size[1] - x = (box[0] + box[1])/2.0 - y = (box[2] + box[3])/2.0 + dw = 1. / size[0] + dh = 1. / size[1] + x = (box[0] + box[1]) / 2.0 + y = (box[2] + box[3]) / 2.0 w = box[1] - box[0] h = box[3] - box[2] - x = x*dw - w = w*dw - y = y*dh - h = h*dh - return (x,y,w,h) + x = x * dw + w = w * dw + y = y * dh + h = h * dh + return (x, y, w, h) def convert_annotation(file_name): - """ Given VOC2012 XML Annotations, returns number of objects and info. """ + """Given VOC2012 XML Annotations, returns number of objects and info.""" in_file = open(file_name) out_file = "" tree = ET.parse(in_file) @@ -958,7 +1077,7 @@ def convert_annotation(file_name): h = int(size.find('height').text) n_objs = 0 - # print(file_name, w, h, size) + # logging.info(file_name, w, h, size) # exit() for obj in root.iter('object'): if dataset != "2012test": @@ -973,7 +1092,7 @@ def convert_annotation(file_name): cls_id = classes.index(cls) xmlbox = obj.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) - bb = convert((w,h), b) + bb = convert((w, h), b) # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' n_objs += 1 @@ -985,19 +1104,19 @@ def convert_annotation(file_name): cls_id = classes.index(cls) xmlbox = part.find('bndbox') b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text)) - bb = convert((w,h), b) + bb = convert((w, h), b) # out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n') out_file += str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n' n_objs += 1 in_file.close() return n_objs, out_file - print("[VOC] Parsing xml annotations files") + logging.info("[VOC] Parsing xml annotations files") n_objs_list = [] - objs_info_list = [] # Darknet Format list of string + objs_info_list = [] # Darknet Format list of string objs_info_dicts = {} for idx, ann_file in enumerate(imgs_ann_file_list): - # print(ann_file) + # logging.info(ann_file) n_objs, objs_info = convert_annotation(ann_file) n_objs_list.append(n_objs) objs_info_list.append(objs_info) @@ -1012,27 +1131,32 @@ def convert_annotation(file_name): n_objs_list, objs_info_list, objs_info_dicts - ## Load and save network list npz def save_npz(save_list=[], name='model.npz', sess=None): """Input parameters and the file name, save parameters into .npz file. Use tl.utils.load_npz() to restore. Parameters ---------- - save_list : a list - Parameters want to be saved. - name : a string or None - The name of the .npz file. + save_list : list of tensor + A list of parameters (tensor) to be saved. + name : str + The name of the `.npz` file. sess : None or Session + Session may be required in some case. Examples -------- - - Save model to npz + Save model to npz + >>> tl.files.save_npz(network.all_params, name='model.npz', sess=sess) - - Load model from npz (Method 1) + + Load model from npz (Method 1) + >>> load_params = tl.files.load_npz(name='model.npz') >>> tl.files.assign_params(sess, load_params, network) - - Load model from npz (Method 2) + + Load model from npz (Method 2) + >>> tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) Notes @@ -1041,7 +1165,8 @@ def save_npz(save_list=[], name='model.npz', sess=None): References ---------- - - `Saving dictionary using numpy `_ + - `Saving dictionary using numpy `__ + """ ## save params into a list save_list_var = [] @@ -1052,89 +1177,88 @@ def save_npz(save_list=[], name='model.npz', sess=None): for k, value in enumerate(save_list): save_list_var.append(value.eval()) except: - print(" Fail to save model, Hint: pass the session into this function, save_npz(network.all_params, name='model.npz', sess=sess)") + logging.info(" Fail to save model, Hint: pass the session into this function, tl.files.save_npz(network.all_params, name='model.npz', sess=sess)") np.savez(name, params=save_list_var) save_list_var = None del save_list_var - print("[*] %s saved" % name) + logging.info("[*] %s saved" % name) ## save params into a dictionary # rename_dict = {} # for k, value in enumerate(save_dict): # rename_dict.update({'param'+str(k) : value.eval()}) # np.savez(name, **rename_dict) - # print('Model is saved to: %s' % name) + # logging.info('Model is saved to: %s' % name) + def load_npz(path='', name='model.npz'): """Load the parameters of a Model saved by tl.files.save_npz(). Parameters ---------- - path : a string - Folder path to .npz file. - name : a string or None - The name of the .npz file. + path : str + Folder path to `.npz` file. + name : str + The name of the `.npz` file. Returns -------- - params : list + list of array A list of parameters in order. Examples -------- - - See ``save_npz`` + - See ``tl.files.save_npz`` References ---------- - - `Saving dictionary using numpy `_ + - `Saving dictionary using numpy `__ + """ ## if save_npz save params into a dictionary # d = np.load( path+name ) # params = [] - # print('Load Model') + # logging.info('Load Model') # for key, val in sorted( d.items() ): # params.append(val) - # print('Loading %s, %s' % (key, str(val.shape))) + # logging.info('Loading %s, %s' % (key, str(val.shape))) # return params ## if save_npz save params into a list - d = np.load( path+name ) + d = np.load(path + name) # for val in sorted( d.items() ): # params = val # return params return d['params'] - # print(d.items()[0][1]['params']) + # logging.info(d.items()[0][1]['params']) # exit() # return d.items()[0][1]['params'] + def assign_params(sess, params, network): """Assign the given parameters to the TensorLayer network. Parameters ---------- - sess : TensorFlow Session. Automatically run when sess is not None. - params : a list - A list of parameters in order. - network : a :class:`Layer` class - The network to be assigned + sess : Session + TensorFlow Session. + params : list of array + A list of parameters (array) in order. + network : :class:`Layer` + The network to be assigned. Returns -------- - ops : list + list of operations A list of tf ops in order that assign params. Support sess.run(ops) manually. Examples -------- - - Save model to npz - >>> tl.files.save_npz(network.all_params, name='model.npz', sess=sess) - - Load model from npz (Method 1) - >>> load_params = tl.files.load_npz(name='model.npz') - >>> tl.files.assign_params(sess, load_params, network) - - Load model from npz (Method 2) - >>> tl.files.load_and_assign_npz(sess=sess, name='model.npz', network=network) + - See ``tl.files.save_npz`` References ---------- - - `Assign value to a TensorFlow variable `_ + - `Assign value to a TensorFlow variable `__ + """ ops = [] for idx, param in enumerate(params): @@ -1143,49 +1267,56 @@ def assign_params(sess, params, network): sess.run(ops) return ops + def load_and_assign_npz(sess=None, name=None, network=None): """Load model from npz and assign to a network. Parameters ------------- - sess : TensorFlow Session - name : string - Model path. - network : a :class:`Layer` class - The network to be assigned + sess : Session + TensorFlow Session. + name : str + The name of the `.npz` file. + network : :class:`Layer` + The network to be assigned. Returns -------- - Returns False if faild to model is not exist. + False or network + Returns False, if the model is not exist. Examples - --------- - >>> tl.files.save_npz(net.all_params, name='net.npz', sess=sess) - >>> tl.files.load_and_assign_npz(sess=sess, name='net.npz', network=net) + -------- + - See ``tl.files.save_npz`` + """ assert network is not None assert sess is not None if not os.path.exists(name): - print("[!] Load {} failed!".format(name)) + logging.info("[!] Load {} failed!".format(name)) return False else: params = load_npz(name=name) assign_params(sess, params, network) - print("[*] Load {} SUCCESS!".format(name)) + logging.info("[*] Load {} SUCCESS!".format(name)) return network + ## Load and save network dict npz def save_npz_dict(save_list=[], name='model.npz', sess=None): """Input parameters and the file name, save parameters as a dictionary into .npz file. + Use ``tl.files.load_and_assign_npz_dict()`` to restore. Parameters ---------- - save_list : a list to tensor for parameters - Parameters want to be saved. - name : a string - The name of the .npz file. + save_list : list of parameters + A list of parameters (tensor) to be saved. + name : str + The name of the `.npz` file. sess : Session + TensorFlow Session. + """ assert sess is not None save_list_names = [tensor.name for tensor in save_list] @@ -1196,20 +1327,23 @@ def save_npz_dict(save_list=[], name='model.npz', sess=None): save_var_dict = None del save_list_var del save_var_dict - print("[*] Model saved in npz_dict %s" % name) + logging.info("[*] Model saved in npz_dict %s" % name) + def load_and_assign_npz_dict(name='model.npz', sess=None): """Restore the parameters saved by ``tl.files.save_npz_dict()``. Parameters ---------- - name : a string - The name of the .npz file. + name : str + The name of the `.npz` file. sess : Session + TensorFlow Session. + """ assert sess is not None if not os.path.exists(name): - print("[!] Load {} failed!".format(name)) + logging.info("[!] Load {} failed!".format(name)) return False params = np.load(name) @@ -1227,12 +1361,13 @@ def load_and_assign_npz_dict(name='model.npz', sess=None): raise KeyError else: ops.append(varlist[0].assign(params[key])) - print("[*] params restored: %s" % key) + logging.info("[*] params restored: %s" % key) except KeyError: - print("[!] Warning: Tensor named %s not found in network." % key) + logging.info("[!] Warning: Tensor named %s not found in network." % key) sess.run(ops) - print("[*] Model restored from npz_dict %s" % name) + logging.info("[*] Model restored from npz_dict %s" % name) + # def save_npz_dict(save_list=[], name='model.npz', sess=None): # """Input parameters and the file name, save parameters as a dictionary into .npz file. Use tl.utils.load_npz_dict() to restore. @@ -1259,14 +1394,14 @@ def load_and_assign_npz_dict(name='model.npz', sess=None): # for k, value in enumerate(save_list): # save_list_var.append(value.eval()) # except: -# print(" Fail to save model, Hint: pass the session into this function, save_npz_dict(network.all_params, name='model.npz', sess=sess)") +# logging.info(" Fail to save model, Hint: pass the session into this function, save_npz_dict(network.all_params, name='model.npz', sess=sess)") # save_var_dict = {str(idx):val for idx, val in enumerate(save_list_var)} # np.savez(name, **save_var_dict) # save_list_var = None # save_var_dict = None # del save_list_var # del save_var_dict -# print("[*] %s saved" % name) +# logging.info("[*] %s saved" % name) # # def load_npz_dict(path='', name='model.npz'): # """Load the parameters of a Model saved by tl.files.save_npz_dict(). @@ -1288,61 +1423,81 @@ def load_and_assign_npz_dict(name='model.npz', sess=None): # return saved_list_var - ## Load and save network ckpt def save_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], global_step=None, printable=False): - """Save parameters into ckpt file. + """Save parameters into `ckpt` file. Parameters ------------ - sess : Session. - mode_name : string, name of the model, default is ``model.ckpt``. - save_dir : string, path / file directory to the ckpt, default is ``checkpoint``. - var_list : list of variables, if not given, save all global variables. - global_step : int or None, step number. - printable : bool, if True, print all params info. + sess : Session + TensorFlow Session. + mode_name : str + The name of the model, default is ``model.ckpt``. + save_dir : str + The path / file directory to the `ckpt`, default is ``checkpoint``. + var_list : list of tensor + The parameters / variables (tensor) to be saved. If empty, save all global variables (default). + global_step : int or None + Step number. + printable : boolean + Whether to print all parameters information. + + See Also + -------- + load_ckpt - Examples - --------- - - see ``tl.files.load_ckpt()``. """ assert sess is not None ckpt_file = os.path.join(save_dir, mode_name) if var_list == []: var_list = tf.global_variables() - print("[*] save %s n_params: %d" % (ckpt_file, len(var_list))) + logging.info("[*] save %s n_params: %d" % (ckpt_file, len(var_list))) if printable: for idx, v in enumerate(var_list): - print(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) + logging.info(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) saver = tf.train.Saver(var_list) saver.save(sess, ckpt_file, global_step=global_step) + def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list=[], is_latest=True, printable=False): - """Load parameters from ckpt file. + """Load parameters from `ckpt` file. Parameters ------------ - sess : Session. - mode_name : string, name of the model, default is ``model.ckpt``. - Note that if ``is_latest`` is True, this function will get the ``mode_name`` automatically. - save_dir : string, path / file directory to the ckpt, default is ``checkpoint``. - var_list : list of variables, if not given, save all global variables. - is_latest : bool, if True, load the latest ckpt, if False, load the ckpt with the name of ```mode_name``. - printable : bool, if True, print all params info. + sess : Session + TensorFlow Session. + mode_name : str + The name of the model, default is ``model.ckpt``. + save_dir : str + The path / file directory to the `ckpt`, default is ``checkpoint``. + var_list : list of tensor + The parameters / variables (tensor) to be saved. If empty, save all global variables (default). + is_latest : boolean + Whether to load the latest `ckpt`, if False, load the `ckpt` with the name of ```mode_name``. + printable : boolean + Whether to print all parameters information. Examples ---------- - - Save all global parameters. + Save all global parameters. + >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', save_dir='model', printable=True) - - Save specific parameters. + + Save specific parameters. + >>> tl.files.save_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', printable=True) - - Load latest ckpt. + + Load latest ckpt. + >>> tl.files.load_ckpt(sess=sess, var_list=net.all_params, save_dir='model', printable=True) - - Load specific ckpt. + + Load specific ckpt. + >>> tl.files.load_ckpt(sess=sess, mode_name='model.ckpt', var_list=net.all_params, save_dir='model', is_latest=False, printable=True) + """ assert sess is not None @@ -1354,24 +1509,23 @@ def load_ckpt(sess=None, mode_name='model.ckpt', save_dir='checkpoint', var_list if var_list == []: var_list = tf.global_variables() - print("[*] load %s n_params: %d" % (ckpt_file, len(var_list))) + logging.info("[*] load %s n_params: %d" % (ckpt_file, len(var_list))) if printable: for idx, v in enumerate(var_list): - print(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) + logging.info(" param {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) try: saver = tf.train.Saver(var_list) saver.restore(sess, ckpt_file) except Exception as e: - print(e) - print("[*] load ckpt fail ...") - + logging.info(e) + logging.info("[*] load ckpt fail ...") ## Load and save variables def save_any_to_npy(save_dict={}, name='file.npy'): - """Save variables to .npy file. + """Save variables to `.npy` file. Examples --------- @@ -1379,15 +1533,18 @@ def save_any_to_npy(save_dict={}, name='file.npy'): >>> data = tl.files.load_npy_to_any(name='test.npy') >>> print(data) ... {'data': ['a','b']} + """ np.save(name, save_dict) + def load_npy_to_any(path='', name='file.npy'): - """Load .npy file. + """Load `.npy` file. Examples --------- - - see save_any_to_npy() + - see tl.files.save_any_to_npy() + """ file_path = os.path.join(path, name) try: @@ -1398,55 +1555,61 @@ def load_npy_to_any(path='', name='file.npy'): try: return npy except: - print("[!] Fail to load %s" % file_path) + logging.info("[!] Fail to load %s" % file_path) exit() - - ## Folder functions def file_exists(filepath): - """ Check whether a file exists by given file path. """ + """Check whether a file exists by given file path.""" return os.path.isfile(filepath) + def folder_exists(folderpath): - """ Check whether a folder exists by given folder path. """ + """Check whether a folder exists by given folder path.""" return os.path.isdir(folderpath) + def del_file(filepath): - """ Delete a file by given file path. """ + """Delete a file by given file path.""" os.remove(filepath) + def del_folder(folderpath): - """ Delete a folder by given folder path. """ + """Delete a folder by given folder path.""" os.rmdir(folderpath) + def read_file(filepath): - """ Read a file and return a string. + """Read a file and return a string. Examples --------- >>> data = tl.files.read_file('data.txt') + """ with open(filepath, 'r') as afile: return afile.read() + def load_file_list(path=None, regx='\.npz', printable=True): - """Return a file list in a folder by given a path and regular expression. + r"""Return a file list in a folder by given a path and regular expression. Parameters ---------- - path : a string or None - A folder path. - regx : a string + path : str or None + A folder path, if `None`, use the current directory. + regx : str The regx of file name. - printable : boolean, whether to print the files infomation. + printable : boolean + Whether to print the files infomation. Examples ---------- >>> file_list = tl.files.load_file_list(path=None, regx='w1pre_[0-9]+\.(npz)') + """ - if path == False: + if path is None: path = os.getcwd() file_list = os.listdir(path) return_list = [] @@ -1455,19 +1618,22 @@ def load_file_list(path=None, regx='\.npz', printable=True): return_list.append(f) # return_list.sort() if printable: - print('Match file list = %s' % return_list) - print('Number of files = %d' % len(return_list)) + logging.info('Match file list = %s' % return_list) + logging.info('Number of files = %d' % len(return_list)) return return_list + def load_folder_list(path=""): """Return a folder list in a folder by given a folder path. Parameters ---------- - path : a string or None + path : str A folder path. + """ - return [os.path.join(path,o) for o in os.listdir(path) if os.path.isdir(os.path.join(path,o))] + return [os.path.join(path, o) for o in os.listdir(path) if os.path.isdir(os.path.join(path, o))] + def exists_or_mkdir(path, verbose=True): """Check a folder by given name, if not exist, create the folder and return False, @@ -1475,101 +1641,106 @@ def exists_or_mkdir(path, verbose=True): Parameters ---------- - path : a string + path : str A folder path. verbose : boolean - If True, prints results, deaults is True + If True (default), prints results. Returns -------- - True if folder exist, otherwise, returns False and create the folder + boolean + True if folder already exist, otherwise, returns False and create the folder. Examples -------- >>> tl.files.exists_or_mkdir("checkpoints/train") + """ if not os.path.exists(path): if verbose: - print("[*] creates %s ..." % path) + logging.info("[*] creates %s ..." % path) os.makedirs(path) return False else: if verbose: - print("[!] %s exists ..." % path) + logging.info("[!] %s exists ..." % path) return True + def maybe_download_and_extract(filename, working_directory, url_source, extract=False, expected_bytes=None): """Checks if file exists in working_directory otherwise tries to dowload the file, and optionally also tries to extract the file if format is ".zip" or ".tar" Parameters ----------- - filename : string + filename : str The name of the (to be) dowloaded file. - working_directory : string + working_directory : str A folder path to search for the file in and dowload the file to - url : string + url : str The URL to download the file from - extract : bool, defaults is False - If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file - expected_bytes : int/None - If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, - defaults is None which corresponds to no check being performed + extract : boolean + If True, tries to uncompress the dowloaded file is ".tar.gz/.tar.bz2" or ".zip" file, default is False. + expected_bytes : int or None + If set tries to verify that the downloaded file is of the specified size, otherwise raises an Exception, defaults is None which corresponds to no check being performed. Returns ---------- - filepath to dowloaded (uncompressed) file + str + File path of the dowloaded (uncompressed) file. Examples -------- - >>> down_file = tl.files.maybe_download_and_extract(filename = 'train-images-idx3-ubyte.gz', - working_directory = 'data/', - url_source = 'http://yann.lecun.com/exdb/mnist/') - >>> tl.files.maybe_download_and_extract(filename = 'ADEChallengeData2016.zip', - working_directory = 'data/', - url_source = 'http://sceneparsing.csail.mit.edu/data/', - extract=True) + >>> down_file = tl.files.maybe_download_and_extract(filename='train-images-idx3-ubyte.gz', + ... working_directory='data/', + ... url_source='http://yann.lecun.com/exdb/mnist/') + >>> tl.files.maybe_download_and_extract(filename='ADEChallengeData2016.zip', + ... working_directory='data/', + ... url_source='http://sceneparsing.csail.mit.edu/data/', + ... extract=True) + """ + # We first define a download function, supporting both Python 2 and 3. def _download(filename, working_directory, url_source): def _dlProgress(count, blockSize, totalSize): - if(totalSize != 0): + if (totalSize != 0): percent = float(count * blockSize) / float(totalSize) * 100.0 sys.stdout.write("\r" "Downloading " + filename + "...%d%%" % percent) sys.stdout.flush() + if sys.version_info[0] == 2: from urllib import urlretrieve else: from urllib.request import urlretrieve filepath = os.path.join(working_directory, filename) - urlretrieve(url_source+filename, filepath, reporthook=_dlProgress) + urlretrieve(url_source + filename, filepath, reporthook=_dlProgress) + sys.stdout.write('\n') exists_or_mkdir(working_directory, verbose=False) filepath = os.path.join(working_directory, filename) if not os.path.exists(filepath): _download(filename, working_directory, url_source) - print() statinfo = os.stat(filepath) - print('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size))#, 'bytes.') - if(not(expected_bytes is None) and (expected_bytes != statinfo.st_size)): + logging.info('Succesfully downloaded %s %s bytes.' % (filename, statinfo.st_size)) #, 'bytes.') + if (not (expected_bytes is None) and (expected_bytes != statinfo.st_size)): raise Exception('Failed to verify ' + filename + '. Can you get to it with a browser?') - if(extract): + if (extract): if tarfile.is_tarfile(filepath): - print('Trying to extract tar file') + logging.info('Trying to extract tar file') tarfile.open(filepath, 'r').extractall(working_directory) - print('... Success!') + logging.info('... Success!') elif zipfile.is_zipfile(filepath): - print('Trying to extract zip file') + logging.info('Trying to extract zip file') with zipfile.ZipFile(filepath) as zf: zf.extractall(working_directory) - print('... Success!') + logging.info('... Success!') else: - print("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported") + logging.info("Unknown compression_format only .tar.gz/.tar.bz2/.tar and .zip supported") return filepath -## Sort def natural_keys(text): """Sort list of string with number in human order. @@ -1583,32 +1754,39 @@ def natural_keys(text): References ---------- - alist.sort(key=natural_keys) sorts in human order - http://nedbatchelder.com/blog/200712/human_sorting.html - (See Toothy's implementation in the comments) + - `link `__ + """ + + # - alist.sort(key=natural_keys) sorts in human order + # http://nedbatchelder.com/blog/200712/human_sorting.html + # (See Toothy's implementation in the comments) def atoi(text): return int(text) if text.isdigit() else text - return [ atoi(c) for c in re.split('(\d+)', text) ] + + return [atoi(c) for c in re.split('(\d+)', text)] + # Visualizing npz files def npz_to_W_pdf(path=None, regx='w1pre_[0-9]+\.(npz)'): - """Convert the first weight matrix of .npz file to .pdf by using tl.visualize.W(). + r"""Convert the first weight matrix of `.npz` file to `.pdf` by using `tl.visualize.W()`. Parameters ---------- - path : a string or None - A folder path to npz files. - regx : a string + path : str + A folder path to `npz` files. + regx : str Regx for the file name. Examples - -------- - >>> Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf. + --------- + Convert the first weight matrix of w1_pre...npz file to w1_pre...pdf. + >>> tl.files.npz_to_W_pdf(path='/Users/.../npz_file/', regx='w1pre_[0-9]+\.(npz)') + """ file_list = load_file_list(path=path, regx=regx) for f in file_list: W = load_npz(path, f)[0] - print("%s --> %s" % (f, f.split('.')[0]+'.pdf')) - visualize.W(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012) + logging.info("%s --> %s" % (f, f.split('.')[0] + '.pdf')) + visualize.draw_weights(W, second=10, saveable=True, name=f.split('.')[0], fig_idx=2012) diff --git a/tensorlayer/iterate.py b/tensorlayer/iterate.py index d6c973dc..5fd68643 100644 --- a/tensorlayer/iterate.py +++ b/tensorlayer/iterate.py @@ -1,21 +1,20 @@ #! /usr/bin/python # -*- coding: utf-8 -*- - - import numpy as np from six.moves import xrange + def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): """Generate a generator that input a group of example in numpy.array and - their labels, return the examples and labels by the given batchsize. + their labels, return the examples and labels by the given batch size. Parameters ---------- inputs : numpy.array - (X) The input features, every row is a example. + The input features, every row is a example. targets : numpy.array - (y) The labels of inputs, every row is a example. + The labels of inputs, every row is a example. batch_size : int The batch size. shuffle : boolean @@ -39,9 +38,10 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): Notes - ------- - - If you have two inputs, e.g. X1 (1000, 100) and X2 (1000, 80), you can ``np.hstack((X1, X2)) - into (1000, 180) and feed into ``inputs``, then you can split a batch of X1 and X2. + ----- + If you have two inputs and one label and want to shuffle them together, e.g. X1 (1000, 100), X2 (1000, 80) and Y (1000, 1), you can stack them together (`np.hstack((X1, X2))`) + into (1000, 180) and feed to ``inputs``. After getting a batch, you can split it back into X1 and X2. + """ assert len(inputs) == len(targets) if shuffle: @@ -54,13 +54,28 @@ def minibatches(inputs=None, targets=None, batch_size=None, shuffle=False): excerpt = slice(start_idx, start_idx + batch_size) yield inputs[excerpt], targets[excerpt] + def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1): """Generate a generator that return a batch of sequence inputs and targets. - If ``batch_size = 100, seq_length = 5``, one return will have ``500`` rows (examples). + If `batch_size=100` and `seq_length=5`, one return will have 500 rows (examples). + + Parameters + ---------- + inputs : numpy.array + The input features, every row is a example. + targets : numpy.array + The labels of inputs, every element is a example. + batch_size : int + The batch size. + seq_length : int + The sequence length. + stride : int + The stride step, default is 1. Examples -------- - - Synced sequence input and output. + Synced sequence input and output. + >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) >>> y = np.asarray([0, 1, 2, 3, 4, 5]) >>> for batch in tl.iterate.seq_minibatches(inputs=X, targets=y, batch_size=2, seq_length=2, stride=1): @@ -78,7 +93,8 @@ def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1): ... ... - - Many to One + Many to One + >>> return_last = True >>> num_steps = 2 >>> X = np.asarray([['a','a'], ['b','b'], ['c','c'], ['d','d'], ['e','e'], ['f','f']]) @@ -97,52 +113,38 @@ def seq_minibatches(inputs, targets, batch_size, seq_length, stride=1): ... ['d' 'd'] ... ['d' 'd'] ... ['e' 'e']] [3 4] + """ assert len(inputs) == len(targets) n_loads = (batch_size * stride) + (seq_length - stride) for start_idx in range(0, len(inputs) - n_loads + 1, (batch_size * stride)): - seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], - dtype=inputs.dtype) - seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], - dtype=targets.dtype) + seq_inputs = np.zeros((batch_size, seq_length) + inputs.shape[1:], dtype=inputs.dtype) + seq_targets = np.zeros((batch_size, seq_length) + targets.shape[1:], dtype=targets.dtype) for b_idx in xrange(batch_size): start_seq_idx = start_idx + (b_idx * stride) end_seq_idx = start_seq_idx + seq_length seq_inputs[b_idx] = inputs[start_seq_idx:end_seq_idx] seq_targets[b_idx] = targets[start_seq_idx:end_seq_idx] - flatten_inputs = seq_inputs.reshape((-1,) + inputs.shape[1:]) - flatten_targets = seq_targets.reshape((-1,) + targets.shape[1:]) + flatten_inputs = seq_inputs.reshape((-1, ) + inputs.shape[1:]) + flatten_targets = seq_targets.reshape((-1, ) + targets.shape[1:]) yield flatten_inputs, flatten_targets + def seq_minibatches2(inputs, targets, batch_size, num_steps): """Generate a generator that iterates on two list of words. Yields (Returns) the source contexts and - the target context by the given batch_size and num_steps (sequence_length), - see ``PTB tutorial``. In TensorFlow's tutorial, this generates the batch_size pointers into the raw - PTB data, and allows minibatch iteration along these pointers. - - - Hint, if the input data are images, you can modify the code as follow. - - .. code-block:: python - - from - data = np.zeros([batch_size, batch_len) - to - data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]]) + the target context by the given batch_size and num_steps (sequence_length). + In TensorFlow's tutorial, this generates the `batch_size` pointers into the raw PTB data, and allows minibatch iteration along these pointers. Parameters ---------- - inputs : a list - the context in list format; note that context usually be - represented by splitting by space, and then convert to unique - word IDs. - targets : a list - the context in list format; note that context usually be - represented by splitting by space, and then convert to unique - word IDs. + inputs : list of data + The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. + targets : list of data + The context in list format; note that context usually be represented by splitting by space, and then convert to unique word IDs. batch_size : int - the batch size. + The batch size. num_steps : int - the number of unrolls. i.e. sequence_length + The number of unrolls. i.e. sequence length Yields ------ @@ -175,16 +177,15 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps): ... [[ 26. 27. 28.] ... [ 36. 37. 38.]] - Code References - --------------- - - ``tensorflow/models/rnn/ptb/reader.py`` + Notes + ----- + - Hint, if the input data are images, you can modify the source code `data = np.zeros([batch_size, batch_len)` to `data = np.zeros([batch_size, batch_len, inputs.shape[1], inputs.shape[2], inputs.shape[3]])`. """ assert len(inputs) == len(targets) data_len = len(inputs) batch_len = data_len // batch_size # data = np.zeros([batch_size, batch_len]) - data = np.zeros((batch_size, batch_len) + inputs.shape[1:], - dtype=inputs.dtype) + data = np.zeros((batch_size, batch_len) + inputs.shape[1:], dtype=inputs.dtype) data2 = np.zeros([batch_size, batch_len]) for i in range(batch_size): @@ -197,21 +198,16 @@ def seq_minibatches2(inputs, targets, batch_size, num_steps): raise ValueError("epoch_size == 0, decrease batch_size or num_steps") for i in range(epoch_size): - x = data[:, i*num_steps:(i+1)*num_steps] - x2 = data2[:, i*num_steps:(i+1)*num_steps] + x = data[:, i * num_steps:(i + 1) * num_steps] + x2 = data2[:, i * num_steps:(i + 1) * num_steps] yield (x, x2) def ptb_iterator(raw_data, batch_size, num_steps): - """ - Generate a generator that iterates on a list of words, see PTB tutorial. Yields (Returns) the source contexts and - the target context by the given batch_size and num_steps (sequence_length).\n - see ``PTB tutorial``. - - e.g. x = [0, 1, 2] y = [1, 2, 3] , when batch_size = 1, num_steps = 3, - raw_data = [i for i in range(100)] + """Generate a generator that iterates on a list of words, see `PTB example `__. + Yields the source contexts and the target context by the given batch_size and num_steps (sequence_length). - In TensorFlow's tutorial, this generates batch_size pointers into the raw + In TensorFlow's tutorial, this generates `batch_size` pointers into the raw PTB data, and allows minibatch iteration along these pointers. Parameters @@ -255,10 +251,6 @@ def ptb_iterator(raw_data, batch_size, num_steps): ... [16 17 18]] ... [[ 7 8 9] ... [17 18 19]] - - Code References - ---------------- - - ``tensorflow/models/rnn/ptb/reader.py`` """ raw_data = np.array(raw_data, dtype=np.int32) @@ -274,160 +266,6 @@ def ptb_iterator(raw_data, batch_size, num_steps): raise ValueError("epoch_size == 0, decrease batch_size or num_steps") for i in range(epoch_size): - x = data[:, i*num_steps:(i+1)*num_steps] - y = data[:, i*num_steps+1:(i+1)*num_steps+1] + x = data[:, i * num_steps:(i + 1) * num_steps] + y = data[:, i * num_steps + 1:(i + 1) * num_steps + 1] yield (x, y) - - - -# def minibatches_for_sequence2D(inputs, targets, batch_size, sequence_length, stride=1): -# """ -# Input a group of example in 2D numpy.array and their labels. -# Return the examples and labels by the given batchsize, sequence_length. -# Use for RNN. -# -# Parameters -# ---------- -# inputs : numpy.array -# (X) The input features, every row is a example. -# targets : numpy.array -# (y) The labels of inputs, every row is a example. -# batchsize : int -# The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 -# sequence_length : int -# The sequence length -# stride : int -# The stride step -# -# Examples -# -------- -# >>> sequence_length = 2 -# >>> batch_size = 4 -# >>> stride = 1 -# >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) -# >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) -# >>> print('X_train = %s' % X_train) -# >>> print('y_train = %s' % y_train) -# >>> for batch in minibatches_for_sequence2D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): -# >>> inputs, targets = batch -# >>> print(inputs) -# >>> print(targets) -# ... [[ 1. 2. 3.] -# ... [ 4. 5. 6.] -# ... [ 4. 5. 6.] -# ... [ 7. 8. 9.]] -# ... [1 2] -# ... [[ 4. 5. 6.] -# ... [ 7. 8. 9.] -# ... [ 7. 8. 9.] -# ... [ 10. 11. 12.]] -# ... [2 3] -# ... ... -# ... [[ 16. 17. 18.] -# ... [ 19. 20. 21.] -# ... [ 19. 20. 21.] -# ... [ 22. 23. 24.]] -# ... [6 7] -# """ -# print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) -# assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length -# # assert int(batch_size % sequence_length) == 0, 'batch_size % sequence_length must == 0\ -# # batch_size is number of examples rather than number of targets' -# -# # print(inputs.shape, len(inputs), len(inputs[0])) -# -# n_targets = int(batch_size/sequence_length) -# # n_targets = int(np.ceil(batch_size/sequence_length)) -# X = np.empty(shape=(0,len(inputs[0])), dtype=np.float32) -# y = np.zeros(shape=(1, n_targets), dtype=np.int32) -# -# for idx in range(sequence_length, len(inputs), stride): # go through all example during 1 epoch -# for n in range(n_targets): # for num of target -# X = np.concatenate((X, inputs[idx-sequence_length+n:idx+n])) -# y[0][n] = targets[idx-1+n] -# # y = np.vstack((y, targets[idx-1+n])) -# yield X, y[0] -# X = np.empty(shape=(0,len(inputs[0]))) -# # y = np.empty(shape=(1,0)) -# -# -# def minibatches_for_sequence4D(inputs, targets, batch_size, sequence_length, stride=1): # -# """ -# Input a group of example in 4D numpy.array and their labels. -# Return the examples and labels by the given batchsize, sequence_length. -# Use for RNN. -# -# Parameters -# ---------- -# inputs : numpy.array -# (X) The input features, every row is a example. -# targets : numpy.array -# (y) The labels of inputs, every row is a example. -# batchsize : int -# The batch size must be a multiple of sequence_length: int(batch_size % sequence_length) == 0 -# sequence_length : int -# The sequence length -# stride : int -# The stride step -# -# Examples -# -------- -# >>> sequence_length = 2 -# >>> batch_size = 2 -# >>> stride = 1 -# >>> X_train = np.asarray([[1,2,3],[4,5,6],[7,8,9],[10,11,12],[13,14,15],[16,17,18],[19,20,21],[22,23,24]]) -# >>> y_train = np.asarray(['0','1','2','3','4','5','6','7']) -# >>> X_train = np.expand_dims(X_train, axis=1) -# >>> X_train = np.expand_dims(X_train, axis=3) -# >>> for batch in minibatches_for_sequence4D(X_train, y_train, batch_size=batch_size, sequence_length=sequence_length, stride=stride): -# >>> inputs, targets = batch -# >>> print(inputs) -# >>> print(targets) -# ... [[[[ 1.] -# ... [ 2.] -# ... [ 3.]]] -# ... [[[ 4.] -# ... [ 5.] -# ... [ 6.]]]] -# ... [1] -# ... [[[[ 4.] -# ... [ 5.] -# ... [ 6.]]] -# ... [[[ 7.] -# ... [ 8.] -# ... [ 9.]]]] -# ... [2] -# ... ... -# ... [[[[ 19.] -# ... [ 20.] -# ... [ 21.]]] -# ... [[[ 22.] -# ... [ 23.] -# ... [ 24.]]]] -# ... [7] -# """ -# print('len(targets)=%d batch_size=%d sequence_length=%d stride=%d' % (len(targets), batch_size, sequence_length, stride)) -# assert len(inputs) == len(targets), '1 feature vector have 1 target vector/value' #* sequence_length -# # assert int(batch_size % sequence_length) == 0, 'in LSTM, batch_size % sequence_length must == 0\ -# # batch_size is number of X_train rather than number of targets' -# assert stride >= 1, 'stride must be >=1, at least move 1 step for each iternation' -# -# n_example, n_channels, width, height = inputs.shape -# print('n_example=%d n_channels=%d width=%d height=%d' % (n_example, n_channels, width, height)) -# -# n_targets = int(np.ceil(batch_size/sequence_length)) # 实际为 batchsize/sequence_length + 1 -# print(n_targets) -# X = np.zeros(shape=(batch_size, n_channels, width, height), dtype=np.float32) -# # X = np.zeros(shape=(n_targets, sequence_length, n_channels, width, height), dtype=np.float32) -# y = np.zeros(shape=(1,n_targets), dtype=np.int32) -# # y = np.empty(shape=(0,1), dtype=np.float32) -# # time.sleep(2) -# for idx in range(sequence_length, n_example-n_targets+2, stride): # go through all example during 1 epoch -# for n in range(n_targets): # for num of target -# # print(idx+n, inputs[idx-sequence_length+n : idx+n].shape) -# X[n*sequence_length : (n+1)*sequence_length] = inputs[idx+n-sequence_length : idx+n] -# # X[n] = inputs[idx-sequence_length+n:idx+n] -# y[0][n] = targets[idx+n-1] -# # y = np.vstack((y, targets[idx-1+n])) -# # y = targets[idx: idx+n_targets] -# yield X, y[0] diff --git a/tensorlayer/layers/__init__.py b/tensorlayer/layers/__init__.py new file mode 100644 index 00000000..cad53aab --- /dev/null +++ b/tensorlayer/layers/__init__.py @@ -0,0 +1,26 @@ +""" +TensorLayer provides rich layer implementations trailed for +various benchmarks and domain-specific problems. In addition, we also +support transparent access to native TensorFlow parameters. +For example, we provide not only layers for local response normalization, but also +layers that allow user to apply ``tf.nn.lrn`` on ``network.outputs``. +More functions can be found in `TensorFlow API `__. +""" + +from .core import * +from .convolution import * +from .super_resolution import * +from .normalization import * +from .spatial_transformer import * +from .object_detection import * +from .time_distribution import * +from .pooling import * +from .padding import * +from .recurrent import * +from .shape import * +from .importer import * +from .merge import * +from .extend import * +from .stack import * +from .special_activation import * +from .flow_control import * diff --git a/tensorlayer/layers/convolution.py b/tensorlayer/layers/convolution.py new file mode 100644 index 00000000..a103739f --- /dev/null +++ b/tensorlayer/layers/convolution.py @@ -0,0 +1,1606 @@ +# -*- coding: utf-8 -*- + +import tensorflow as tf + +from .core import * + + +class Conv1dLayer(Layer): + """ + The :class:`Conv1dLayer` class is a 1D CNN layer, see `tf.nn.convolution `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + act : activation function + The activation function of this layer. + shape : tuple of int + The shape of the filters: (filter_length, in_channels, out_channels). + stride : int + The number of entries by which the filter is moved right at a step. + dilation_rate : int + Filter up-sampling/input down-sampling rate. + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + Default is 'NWC' as it is a 1D CNN. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name + + """ + + def __init__( + self, + layer, + act=tf.identity, + shape=(5, 1, 5), + stride=1, + dilation_rate=1, + padding='SAME', + data_format='NWC', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='cnn1d', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("Conv1dLayer %s: shape:%s stride:%s pad:%s act:%s" % (self.name, str(shape), str(stride), padding, act.__name__)) + + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W_conv1d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + self.outputs = tf.nn.convolution( + self.inputs, W, strides=(stride, ), padding=padding, dilation_rate=(dilation_rate, ), data_format=data_format) # 1.2 + if b_init: + b = tf.get_variable(name='b_conv1d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = self.outputs + b + + self.outputs = act(self.outputs) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +class Conv2dLayer(Layer): + """ + The :class:`Conv2dLayer` class is a 2D CNN layer, see `tf.nn.conv2d `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + act : activation function + The activation function of this layer. + shape : tuple of int + The shape of the filters: (filter_height, filter_width, in_channels, out_channels). + strides : tuple of int + The sliding window strides of corresponding input dimensions. + It must be in the same order as the ``shape`` parameter. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the the weight matrix. + b_init : initializer or None + The initializer for the the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + use_cudnn_on_gpu : bool + Default is False. + data_format : str + "NHWC" or "NCHW", default is "NHWC". + name : str + A unique layer name. + + Notes + ----- + - shape = [h, w, the number of output channel of previous layer, the number of output channels] + - the number of output channel of a layer is its last dimension. + + Examples + -------- + With TensorFlow + + >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.Conv2dLayer(net, + ... act = tf.nn.relu, + ... shape = (5, 5, 1, 32), # 32 features for each 5x5 patch + ... strides = (1, 1, 1, 1), + ... padding='SAME', + ... W_init=tf.truncated_normal_initializer(stddev=5e-2), + ... W_init_args={}, + ... b_init = tf.constant_initializer(value=0.0), + ... b_init_args = {}, + ... name ='cnn_layer1') # output: (?, 28, 28, 32) + >>> net = tl.layers.PoolLayer(net, + ... ksize=(1, 2, 2, 1), + ... strides=(1, 2, 2, 1), + ... padding='SAME', + ... pool = tf.nn.max_pool, + ... name ='pool_layer1',) # output: (?, 14, 14, 32) + + Without TensorLayer, you can implement 2d convolution as follow. + + >>> W = tf.Variable(W_init(shape=[5, 5, 1, 32], ), name='W_conv') + >>> b = tf.Variable(b_init(shape=[32], ), name='b_conv') + >>> outputs = tf.nn.relu( tf.nn.conv2d(inputs, W, + ... strides=[1, 1, 1, 1], + ... padding='SAME') + b ) + + """ + + def __init__( + self, + layer, + act=tf.identity, + shape=(5, 5, 1, 100), + strides=(1, 1, 1, 1), + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + use_cudnn_on_gpu=None, + data_format=None, + name='cnn_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("Conv2dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) + + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W_conv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + if b_init: + b = tf.get_variable(name='b_conv2d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act( + tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format) + b) + else: + self.outputs = act(tf.nn.conv2d(self.inputs, W, strides=strides, padding=padding, use_cudnn_on_gpu=use_cudnn_on_gpu, data_format=data_format)) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +class DeConv2dLayer(Layer): + """A de-convolution 2D layer. + + See `tf.nn.conv2d_transpose `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + act : activation function + The activation function of this layer. + shape : tuple of int + Shape of the filters: (height, width, output_channels, in_channels). + The filter's ``in_channels`` dimension must match that of value. + output_shape : tuple of int + Output shape of the deconvolution, + strides : tuple of int + The sliding window strides for corresponding input dimensions. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for initializing the weight matrix. + b_init_args : dictionary + The arguments for initializing the bias vector. + name : str + A unique layer name. + + Notes + ----- + - We recommend to use `DeConv2d` with TensorFlow version higher than 1.3. + - shape = [h, w, the number of output channels of this layer, the number of output channel of the previous layer]. + - output_shape = [batch_size, any, any, the number of output channels of this layer]. + - the number of output channel of a layer is its last dimension. + + Examples + -------- + A part of the generator in DCGAN example + + >>> batch_size = 64 + >>> inputs = tf.placeholder(tf.float32, [batch_size, 100], name='z_noise') + >>> net_in = tl.layers.InputLayer(inputs, name='g/in') + >>> net_h0 = tl.layers.DenseLayer(net_in, n_units = 8192, + ... W_init = tf.random_normal_initializer(stddev=0.02), + ... act = tf.identity, name='g/h0/lin') + >>> print(net_h0.outputs._shape) + ... (64, 8192) + >>> net_h0 = tl.layers.ReshapeLayer(net_h0, shape=(-1, 4, 4, 512), name='g/h0/reshape') + >>> net_h0 = tl.layers.BatchNormLayer(net_h0, act=tf.nn.relu, is_train=is_train, name='g/h0/batch_norm') + >>> print(net_h0.outputs._shape) + ... (64, 4, 4, 512) + >>> net_h1 = tl.layers.DeConv2dLayer(net_h0, + ... shape=(5, 5, 256, 512), + ... output_shape=(batch_size, 8, 8, 256), + ... strides=(1, 2, 2, 1), + ... act=tf.identity, name='g/h1/decon2d') + >>> net_h1 = tl.layers.BatchNormLayer(net_h1, act=tf.nn.relu, is_train=is_train, name='g/h1/batch_norm') + >>> print(net_h1.outputs._shape) + ... (64, 8, 8, 256) + + U-Net + + >>> .... + >>> conv10 = tl.layers.Conv2dLayer(conv9, act=tf.nn.relu, + ... shape=(3,3,1024,1024), strides=(1,1,1,1), padding='SAME', + ... W_init=w_init, b_init=b_init, name='conv10') + >>> print(conv10.outputs) + ... (batch_size, 32, 32, 1024) + >>> deconv1 = tl.layers.DeConv2dLayer(conv10, act=tf.nn.relu, + ... shape=(3,3,512,1024), strides=(1,2,2,1), output_shape=(batch_size,64,64,512), + ... padding='SAME', W_init=w_init, b_init=b_init, name='devcon1_1') + + """ + + def __init__( + self, + layer, + act=tf.identity, + shape=(3, 3, 128, 256), + output_shape=(1, 256, 256, 128), + strides=(1, 2, 2, 1), + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='decnn2d_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("DeConv2dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding, + act.__name__)) + # logging.info(" DeConv2dLayer: Untested") + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W_deconv2d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + if b_init: + b = tf.get_variable(name='b_deconv2d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) + else: + self.outputs = act(tf.nn.conv2d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding)) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +class Conv3dLayer(Layer): + """ + The :class:`Conv3dLayer` class is a 3D CNN layer, see `tf.nn.conv3d `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + act : activation function + The activation function of this layer. + shape : tuple of int + Shape of the filters: (filter_depth, filter_height, filter_width, in_channels, out_channels). + strides : tuple of int + The sliding window strides for corresponding input dimensions. + Must be in the same order as the shape dimension. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the weight matrix. + b_init : initializer + The initializer for the bias vector. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + act=tf.identity, + shape=(2, 2, 2, 64, 128), + strides=(1, 2, 2, 2, 1), + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='cnn3d_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("Conv3dLayer %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) + + with tf.variable_scope(name) as vs: + # W = tf.Variable(W_init(shape=shape, **W_init_args), name='W_conv') + # b = tf.Variable(b_init(shape=[shape[-1]], **b_init_args), name='b_conv') + W = tf.get_variable(name='W_conv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + b = tf.get_variable(name='b_conv3d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b) + + # self.outputs = act( tf.nn.conv3d(self.inputs, W, strides=strides, padding=padding, name=None) + b ) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend([W, b]) + + +class DeConv3dLayer(Layer): + """The :class:`DeConv3dLayer` class is deconvolutional 3D layer, see `tf.nn.conv3d_transpose `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + act : activation function + The activation function of this layer. + shape : tuple of int + The shape of the filters: (depth, height, width, output_channels, in_channels). + The filter's in_channels dimension must match that of value. + output_shape : tuple of int + The output shape of the deconvolution. + strides : tuple of int + The sliding window strides for corresponding input dimensions. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the weight matrix. + b_init : initializer + The initializer for the bias vector. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + act=tf.identity, + shape=(2, 2, 2, 128, 256), + output_shape=(1, 12, 32, 32, 128), + strides=(1, 2, 2, 2, 1), + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='decnn3d_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("DeConv3dLayer %s: shape:%s out_shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(output_shape), str(strides), padding, + act.__name__)) + + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W_deconv3d', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + b = tf.get_variable(name='b_deconv3d', shape=(shape[-2]), initializer=b_init, dtype=D_TYPE, **b_init_args) + + self.outputs = act(tf.nn.conv3d_transpose(self.inputs, W, output_shape=output_shape, strides=strides, padding=padding) + b) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend([W, b]) + + +class UpSampling2dLayer(Layer): + """The :class:`UpSampling2dLayer` class is a up-sampling 2D layer, see `tf.image.resize_images `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer with 4-D Tensor of the shape (batch, height, width, channels) or 3-D Tensor of the shape (height, width, channels). + size : tuple of int/float + (height, width) scale factor or new size of height and width. + is_scale : boolean + If True (default), the `size` is a scale factor; otherwise, the `size` is the numbers of pixels of height and width. + method : int + The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. + - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. + - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. + - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. + - Index 3 ResizeMethod.AREA, Area interpolation. + align_corners : boolean + If True, align the corners of the input and output. Default is False. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + size, + is_scale=True, + method=0, + align_corners=False, + name='upsample2d_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if len(self.inputs.get_shape()) == 3: + if is_scale: + size_h = size[0] * int(self.inputs.get_shape()[0]) + size_w = size[1] * int(self.inputs.get_shape()[1]) + size = [int(size_h), int(size_w)] + elif len(self.inputs.get_shape()) == 4: + if is_scale: + size_h = size[0] * int(self.inputs.get_shape()[1]) + size_w = size[1] * int(self.inputs.get_shape()[2]) + size = [int(size_h), int(size_w)] + else: + raise Exception("Donot support shape %s" % self.inputs.get_shape()) + logging.info("UpSampling2dLayer %s: is_scale:%s size:%s method:%d align_corners:%s" % (name, is_scale, size, method, align_corners)) + with tf.variable_scope(name) as vs: + try: + self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners) + except: # for TF 0.10 + self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +class DownSampling2dLayer(Layer): + """The :class:`DownSampling2dLayer` class is down-sampling 2D layer, see `tf.image.resize_images `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer with 4-D Tensor in the shape of (batch, height, width, channels) or 3-D Tensor in the shape of (height, width, channels). + size : tuple of int/float + (height, width) scale factor or new size of height and width. + is_scale : boolean + If True (default), the `size` is the scale factor; otherwise, the `size` are numbers of pixels of height and width. + method : int + The resize method selected through the index. Defaults index is 0 which is ResizeMethod.BILINEAR. + - Index 0 is ResizeMethod.BILINEAR, Bilinear interpolation. + - Index 1 is ResizeMethod.NEAREST_NEIGHBOR, Nearest neighbor interpolation. + - Index 2 is ResizeMethod.BICUBIC, Bicubic interpolation. + - Index 3 ResizeMethod.AREA, Area interpolation. + align_corners : boolean + If True, exactly align all 4 corners of the input and output. Default is False. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + size, + is_scale=True, + method=0, + align_corners=False, + name='downsample2d_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if len(self.inputs.get_shape()) == 3: + if is_scale: + size_h = size[0] * int(self.inputs.get_shape()[0]) + size_w = size[1] * int(self.inputs.get_shape()[1]) + size = [int(size_h), int(size_w)] + elif len(self.inputs.get_shape()) == 4: + if is_scale: + size_h = size[0] * int(self.inputs.get_shape()[1]) + size_w = size[1] * int(self.inputs.get_shape()[2]) + size = [int(size_h), int(size_w)] + else: + raise Exception("Donot support shape %s" % self.inputs.get_shape()) + logging.info("DownSampling2dLayer %s: is_scale:%s size:%s method:%d, align_corners:%s" % (name, is_scale, size, method, align_corners)) + with tf.variable_scope(name) as vs: + try: + self.outputs = tf.image.resize_images(self.inputs, size=size, method=method, align_corners=align_corners) + except: # for TF 0.10 + self.outputs = tf.image.resize_images(self.inputs, new_height=size[0], new_width=size[1], method=method, align_corners=align_corners) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +class DeformableConv2dLayer(Layer): + """The :class:`DeformableConv2dLayer` class is a 2D + `Deformable Convolutional Networks `__. + """ + + def __init__(self, + layer, + act=tf.identity, + offset_layer=None, + shape=(3, 3, 1, 100), + name='deformable_conv_2d_layer', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}): + raise Exception("deprecated, use DeformableConv2d instead") + + +class DeformableConv2d(Layer): + """The :class:`DeformableConv2d` class is a 2D + `Deformable Convolutional Networks `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + offset_layer : :class:`Layer` + To predict the offset of convolution operations. + The output shape is (batchsize, input height, input width, 2*(number of element in the convolution kernel)) + e.g. if apply a 3*3 kernel, the number of the last dimension should be 18 (2*3*3) + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + act : activation function + The activation function of this layer. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + Examples + -------- + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> offset1 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset1') + >>> net = tl.layers.DeformableConv2d(net, offset1, 32, (3, 3), act=act, name='deformable1') + >>> offset2 = tl.layers.Conv2d(net, 18, (3, 3), (1, 1), act=act, padding='SAME', name='offset2') + >>> net = tl.layers.DeformableConv2d(net, offset2, 64, (3, 3), act=act, name='deformable2') + + References + ---------- + - The deformation operation was adapted from the implementation in `here `__ + + Notes + ----- + - The padding is fixed to 'SAME'. + - The current implementation is not optimized for memory usgae. Please use it carefully. + + """ + + # >>> net = tl.layers.InputLayer(x, name='input_layer') + # >>> offset_1 = tl.layers.Conv2dLayer(layer=net, act=act, shape=(3, 3, 3, 18), strides=(1, 1, 1, 1),padding='SAME', name='offset_layer1') + # >>> net = tl.layers.DeformableConv2dLayer(layer=net, act=act, offset_layer=offset_1, shape=(3, 3, 3, 32), name='deformable_conv_2d_layer1') + # >>> offset_2 = tl.layers.Conv2dLayer(layer=net, act=act, shape=(3, 3, 32, 18), strides=(1, 1, 1, 1), padding='SAME', name='offset_layer2') + # >>> net = tl.layers.DeformableConv2dLayer(layer=net, act=act, offset_layer=offset_2, shape=(3, 3, 32, 64), name='deformable_conv_2d_layer2') + def __init__( + self, + layer, + offset_layer=None, + # shape=(3, 3, 1, 100), + n_filter=32, + filter_size=(3, 3), + act=tf.identity, + name='deformable_conv_2d', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}): + if tf.__version__ < "1.4": + raise Exception("Deformable CNN layer requires tensrflow 1.4 or higher version | current version %s" % tf.__version__) + + def _to_bc_h_w(x, x_shape): + """(b, h, w, c) -> (b*c, h, w)""" + x = tf.transpose(x, [0, 3, 1, 2]) + x = tf.reshape(x, (-1, x_shape[1], x_shape[2])) + return x + + def _to_b_h_w_n_c(x, x_shape): + """(b*c, h, w, n) -> (b, h, w, n, c)""" + x = tf.reshape(x, (-1, x_shape[4], x_shape[1], x_shape[2], x_shape[3])) + x = tf.transpose(x, [0, 2, 3, 4, 1]) + return x + + def tf_flatten(a): + """Flatten tensor""" + return tf.reshape(a, [-1]) + + def _get_vals_by_coords(inputs, coords, idx, out_shape): + indices = tf.stack([idx, tf_flatten(coords[:, :, :, :, 0]), tf_flatten(coords[:, :, :, :, 1])], axis=-1) + vals = tf.gather_nd(inputs, indices) + vals = tf.reshape(vals, out_shape) + return vals + + def _tf_repeat(a, repeats): + """Tensorflow version of np.repeat for 1D""" + # https://github.com/tensorflow/tensorflow/issues/8521 + assert len(a.get_shape()) == 1 + + a = tf.expand_dims(a, -1) + a = tf.tile(a, [1, repeats]) + a = tf_flatten(a) + return a + + def _tf_batch_map_coordinates(inputs, coords): + """Batch version of tf_map_coordinates + + Only supports 2D feature maps + + Parameters + ---------- + inputs : ``tf.Tensor`` + shape = (b*c, h, w) + coords : ``tf.Tensor`` + shape = (b*c, h, w, n, 2) + + Returns + ------- + ``tf.Tensor`` + A Tensor with the shape as (b*c, h, w, n) + + """ + input_shape = inputs.get_shape() + coords_shape = coords.get_shape() + batch_channel = tf.shape(inputs)[0] + input_h = int(input_shape[1]) + input_w = int(input_shape[2]) + kernel_n = int(coords_shape[3]) + n_coords = input_h * input_w * kernel_n + + coords_lt = tf.cast(tf.floor(coords), 'int32') + coords_rb = tf.cast(tf.ceil(coords), 'int32') + coords_lb = tf.stack([coords_lt[:, :, :, :, 0], coords_rb[:, :, :, :, 1]], axis=-1) + coords_rt = tf.stack([coords_rb[:, :, :, :, 0], coords_lt[:, :, :, :, 1]], axis=-1) + + idx = _tf_repeat(tf.range(batch_channel), n_coords) + + vals_lt = _get_vals_by_coords(inputs, coords_lt, idx, (batch_channel, input_h, input_w, kernel_n)) + vals_rb = _get_vals_by_coords(inputs, coords_rb, idx, (batch_channel, input_h, input_w, kernel_n)) + vals_lb = _get_vals_by_coords(inputs, coords_lb, idx, (batch_channel, input_h, input_w, kernel_n)) + vals_rt = _get_vals_by_coords(inputs, coords_rt, idx, (batch_channel, input_h, input_w, kernel_n)) + + coords_offset_lt = coords - tf.cast(coords_lt, 'float32') + + vals_t = vals_lt + (vals_rt - vals_lt) * coords_offset_lt[:, :, :, :, 0] + vals_b = vals_lb + (vals_rb - vals_lb) * coords_offset_lt[:, :, :, :, 0] + mapped_vals = vals_t + (vals_b - vals_t) * coords_offset_lt[:, :, :, :, 1] + + return mapped_vals + + def _tf_batch_map_offsets(inputs, offsets, grid_offset): + """Batch map offsets into input + + Parameters + ------------ + inputs : ``tf.Tensor`` + shape = (b, h, w, c) + offsets: ``tf.Tensor`` + shape = (b, h, w, 2*n) + grid_offset: `tf.Tensor`` + Offset grids shape = (h, w, n, 2) + + Returns + ------- + ``tf.Tensor`` + A Tensor with the shape as (b, h, w, c) + + """ + input_shape = inputs.get_shape() + batch_size = tf.shape(inputs)[0] + kernel_n = int(int(offsets.get_shape()[3]) / 2) + input_h = input_shape[1] + input_w = input_shape[2] + channel = input_shape[3] + + # inputs (b, h, w, c) --> (b*c, h, w) + inputs = _to_bc_h_w(inputs, input_shape) + + # offsets (b, h, w, 2*n) --> (b, h, w, n, 2) + offsets = tf.reshape(offsets, (batch_size, input_h, input_w, kernel_n, 2)) + # offsets (b, h, w, n, 2) --> (b*c, h, w, n, 2) + # offsets = tf.tile(offsets, [channel, 1, 1, 1, 1]) + + coords = tf.expand_dims(grid_offset, 0) # grid_offset --> (1, h, w, n, 2) + coords = tf.tile(coords, [batch_size, 1, 1, 1, 1]) + offsets # grid_offset --> (b, h, w, n, 2) + + # clip out of bound + coords = tf.stack( + [ + tf.clip_by_value(coords[:, :, :, :, 0], 0.0, tf.cast(input_h - 1, 'float32')), + tf.clip_by_value(coords[:, :, :, :, 1], 0.0, tf.cast(input_w - 1, 'float32')) + ], + axis=-1) + coords = tf.tile(coords, [channel, 1, 1, 1, 1]) + + mapped_vals = _tf_batch_map_coordinates(inputs, coords) + # (b*c, h, w, n) --> (b, h, w, n, c) + mapped_vals = _to_b_h_w_n_c(mapped_vals, [batch_size, input_h, input_w, kernel_n, channel]) + + return mapped_vals + + Layer.__init__(self, name=name) + self.inputs = layer.outputs + self.offset_layer = offset_layer + if act is None: + act = tf.identity + logging.info("DeformableConv2d %s: n_filter: %d, filter_size: %s act:%s" % (self.name, n_filter, str(filter_size), act.__name__)) + + try: + pre_channel = int(layer.outputs.get_shape()[-1]) + except: # if pre_channel is ?, it happens when using Spatial Transformer Net + pre_channel = 1 + logging.info("[warnings] unknow input channels, set to 1") + shape = (filter_size[0], filter_size[1], pre_channel, n_filter) + + with tf.variable_scope(name) as vs: + offset = self.offset_layer.outputs + assert offset.get_shape()[-1] == 2 * shape[0] * shape[1] + + # Grid initialisation + input_h = int(self.inputs.get_shape()[1]) + input_w = int(self.inputs.get_shape()[2]) + kernel_n = shape[0] * shape[1] + initial_offsets = tf.stack(tf.meshgrid(tf.range(shape[0]), tf.range(shape[1]), indexing='ij')) # initial_offsets --> (kh, kw, 2) + initial_offsets = tf.reshape(initial_offsets, (-1, 2)) # initial_offsets --> (n, 2) + initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, n, 2) + initial_offsets = tf.expand_dims(initial_offsets, 0) # initial_offsets --> (1, 1, n, 2) + initial_offsets = tf.tile(initial_offsets, [input_h, input_w, 1, 1]) # initial_offsets --> (h, w, n, 2) + initial_offsets = tf.cast(initial_offsets, 'float32') + grid = tf.meshgrid( + tf.range(-int((shape[0] - 1) / 2.0), int(input_h - int((shape[0] - 1) / 2.0)), 1), + tf.range(-int((shape[1] - 1) / 2.0), int(input_w - int((shape[1] - 1) / 2.0)), 1), + indexing='ij') + + grid = tf.stack(grid, axis=-1) + grid = tf.cast(grid, 'float32') # grid --> (h, w, 2) + grid = tf.expand_dims(grid, 2) # grid --> (h, w, 1, 2) + grid = tf.tile(grid, [1, 1, kernel_n, 1]) # grid --> (h, w, n, 2) + grid_offset = grid + initial_offsets # grid_offset --> (h, w, n, 2) + + input_deform = _tf_batch_map_offsets(self.inputs, offset, grid_offset) + + W = tf.get_variable( + name='W_deformableconv2d', shape=[1, 1, shape[0] * shape[1], shape[-2], shape[-1]], initializer=W_init, dtype=D_TYPE, **W_init_args) + + if b_init: + b = tf.get_variable(name='b_deformableconv2d', shape=(shape[-1]), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = tf.reshape( + act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None) + b), + (tf.shape(self.inputs)[0], input_h, input_w, shape[-1])) + else: + self.outputs = tf.reshape( + act(tf.nn.conv3d(input_deform, W, strides=[1, 1, 1, 1, 1], padding='VALID', name=None)), + (tf.shape(self.inputs)[0], input_h, input_w, shape[-1])) + + # fixed + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + # offset_layer + offset_params = [osparam for osparam in offset_layer.all_params if osparam not in layer.all_params] + offset_layers = [oslayer for oslayer in offset_layer.all_layers if oslayer not in layer.all_layers] + + self.all_params.extend(offset_params) + self.all_layers.extend(offset_layers) + self.all_drop.update(offset_layer.all_drop) + + # this layer + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +def atrous_conv1d( + layer, + n_filter=32, + filter_size=2, + stride=1, + dilation=1, + act=tf.identity, + padding='SAME', + data_format='NWC', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='conv1d', +): + """Simplified version of :class:`AtrousConv1dLayer`. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + n_filter : int + The number of filters. + filter_size : int + The filter size. + stride : tuple of int + The strides: (height, width). + dilation : int + The filter dilation size. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + Default is 'NWC' as it is a 1D CNN. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A :class:`AtrousConv1dLayer` object + + """ + return Conv1dLayer( + layer=layer, + act=act, + shape=(filter_size, int(layer.outputs.get_shape()[-1]), n_filter), + stride=stride, + padding=padding, + dilation_rate=dilation, + data_format=data_format, + W_init=W_init, + b_init=b_init, + W_init_args=W_init_args, + b_init_args=b_init_args, + name=name, + ) + + +class AtrousConv2dLayer(Layer): + """The :class:`AtrousConv2dLayer` class is 2D atrous convolution (a.k.a. convolution with holes or dilated + convolution) 2D layer, see `tf.nn.atrous_conv2d `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer with a 4D output tensor in the shape of (batch, height, width, channels). + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size: (height, width). + rate : int + The stride that we sample input values in the height and width dimensions. + This equals the rate that we up-sample the filters by inserting zeros across the height and width dimensions. + In the literature, this parameter is sometimes mentioned as input stride or dilation. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + """ + + def __init__(self, + layer, + n_filter=32, + filter_size=(3, 3), + rate=2, + act=tf.identity, + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='atrou2d'): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if act is None: + act = tf.identity + logging.info("AtrousConv2dLayer %s: n_filter:%d filter_size:%s rate:%d pad:%s act:%s" % (self.name, n_filter, filter_size, rate, padding, act.__name__)) + with tf.variable_scope(name) as vs: + shape = [filter_size[0], filter_size[1], int(self.inputs.get_shape()[-1]), n_filter] + filters = tf.get_variable(name='filter', shape=shape, initializer=W_init, dtype=D_TYPE, **W_init_args) + if b_init: + b = tf.get_variable(name='b', shape=(n_filter), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding) + b) + else: + self.outputs = act(tf.nn.atrous_conv2d(self.inputs, filters, rate, padding)) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([filters, b]) + else: + self.all_params.extend([filters]) + + +class _SeparableConv2dLayer(Layer): # TODO + """The :class:`SeparableConv2dLayer` class is 2D convolution with separable filters, see `tf.layers.separable_conv2d `__. + + This layer has not been fully tested yet. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer with a 4D output tensor in the shape of [batch, height, width, channels]. + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + strides : tuple of int + The strides (height, width). + This can be a single integer if you want to specify the same value for all spatial dimensions. + Specifying any stride value != 1 is incompatible with specifying any dilation_rate value != 1. + padding : str + The type of padding algorithm: "SAME" or "VALID" + data_format : str + One of channels_last (Default) or channels_first. + The order must match the input dimensions. + channels_last corresponds to inputs with shapedata_format = 'NWHC' (batch, width, height, channels) while + channels_first corresponds to inputs with shape [batch, channels, width, height]. + dilation_rate : int or tuple of ints + The dilation rate of the convolution. + It can be a single integer if you want to specify the same value for all spatial dimensions. + Currently, specifying any dilation_rate value != 1 is incompatible with specifying any stride value != 1. + depth_multiplier : int + The number of depthwise convolution output channels for each input channel. + The total number of depthwise convolution output channels will be equal to num_filters_in * depth_multiplier. + act : activation function + The activation function of this layer. + use_bias : boolean + Whether the layer uses a bias + depthwise_initializer : initializer + The initializer for the depthwise convolution kernel. + pointwise_initializer : initializer + The initializer for the pointwise convolution kernel. + bias_initializer : initializer + The initializer for the bias vector. If None, skip bias. + depthwise_regularizer : regularizer + Optional regularizer for the depthwise convolution kernel. + pointwise_regularizer : regularizer + Optional regularizer for the pointwise convolution kernel. + bias_regularizer : regularizer + Optional regularizer for the bias vector. + activity_regularizer : regularizer + Regularizer function for the output. + name : str + A unique layer name. + + """ + + def __init__(self, + layer, + n_filter, + filter_size=5, + strides=(1, 1), + padding='valid', + data_format='channels_last', + dilation_rate=(1, 1), + depth_multiplier=1, + act=tf.identity, + use_bias=True, + depthwise_initializer=None, + pointwise_initializer=None, + bias_initializer=tf.zeros_initializer, + depthwise_regularizer=None, + pointwise_regularizer=None, + bias_regularizer=None, + activity_regularizer=None, + name='atrou2d'): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + assert tf.__version__ > "0.12.1", "This layer only supports for TF 1.0+" + + bias_initializer = bias_initializer() + + logging.info("SeparableConv2dLayer %s: n_filter:%d filter_size:%s strides:%s padding:%s dilation_rate:%s depth_multiplier:%s act:%s" % + (self.name, n_filter, filter_size, str(strides), padding, str(dilation_rate), str(depth_multiplier), act.__name__)) + + with tf.variable_scope(name) as vs: + self.outputs = tf.layers.separable_conv2d( + self.inputs, + filters=n_filter, + kernel_size=filter_size, + strides=strides, + padding=padding, + data_format=data_format, + dilation_rate=dilation_rate, + depth_multiplier=depth_multiplier, + activation=act, + use_bias=use_bias, + depthwise_initializer=depthwise_initializer, + pointwise_initializer=pointwise_initializer, + bias_initializer=bias_initializer, + depthwise_regularizer=depthwise_regularizer, + pointwise_regularizer=pointwise_regularizer, + bias_regularizer=bias_regularizer, + activity_regularizer=activity_regularizer, + ) + # trainable=True, name=None, reuse=None) + + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) + + +def deconv2d_bilinear_upsampling_initializer(shape): + """Returns the initializer that can be passed to DeConv2dLayer for initializ ingthe + weights in correspondence to channel-wise bilinear up-sampling. + Used in segmentation approaches such as [FCN](https://arxiv.org/abs/1605.06211) + + Parameters + ---------- + shape : tuple of int + The shape of the filters, [height, width, output_channels, in_channels]. + It must match the shape passed to DeConv2dLayer. + + Returns + ------- + ``tf.constant_initializer`` + A constant initializer with weights set to correspond to per channel bilinear upsampling + when passed as W_int in DeConv2dLayer + + Examples + -------- + - Upsampling by a factor of 2, ie e.g 100->200 + >>> rescale_factor = 2 + >>> filter_size = (2 * rescale_factor - rescale_factor % 2) #Corresponding bilinear filter size + >>> num_in_channels = 3 + >>> num_out_channels = 3 + >>> deconv_filter_shape = (filter_size, filter_size, num_out_channels, num_in_channels) + >>> x = tf.placeholder(tf.float32, (1, imsize, imsize, num_channels)) + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> bilinear_init = deconv2d_bilinear_upsampling_initializer(shape=filter_shape) + >>> net = tl.layers.DeConv2dLayer(net, + ... shape=filter_shape, + ... output_shape=(1, imsize*rescale_factor, imsize*rescale_factor, num_out_channels), + ... strides=(1, rescale_factor, rescale_factor, 1), + ... W_init=bilinear_init, + ... padding='SAME', + ... act=tf.identity, name='g/h1/decon2d') + + """ + if shape[0] != shape[1]: + raise Exception('deconv2d_bilinear_upsampling_initializer only supports symmetrical filter sizes') + if shape[3] < shape[2]: + raise Exception('deconv2d_bilinear_upsampling_initializer behaviour is not defined for num_in_channels < num_out_channels ') + + filter_size = shape[0] + num_out_channels = shape[2] + num_in_channels = shape[3] + + # Create bilinear filter kernel as numpy array + bilinear_kernel = np.zeros([filter_size, filter_size], dtype=np.float32) + scale_factor = (filter_size + 1) // 2 + if filter_size % 2 == 1: + center = scale_factor - 1 + else: + center = scale_factor - 0.5 + for x in range(filter_size): + for y in range(filter_size): + bilinear_kernel[x, y] = (1 - abs(x - center) / scale_factor) * \ + (1 - abs(y - center) / scale_factor) + weights = np.zeros((filter_size, filter_size, num_out_channels, num_in_channels)) + for i in range(num_out_channels): + weights[:, :, i, i] = bilinear_kernel + + # assign numpy array to constant_initalizer and pass to get_variable + bilinear_weights_init = tf.constant_initializer(value=weights, dtype=D_TYPE) # dtype=tf.float32) + return bilinear_weights_init + + +def conv1d( + layer, + n_filter=32, + filter_size=5, + stride=1, + dilation_rate=1, + act=tf.identity, + padding='SAME', + data_format="NWC", + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='conv1d', +): + """Simplified version of :class:`Conv1dLayer`. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + n_filter : int + The number of filters + filter_size : int + The filter size + stride : int + The stride step + dilation_rate : int + Specifying the dilation rate to use for dilated convolution. + act : activation function + The function that is applied to the layer activations + padding : str + The padding algorithm type: "SAME" or "VALID". + data_format : str + Default is 'NWC' as it is a 1D CNN. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name + + Returns + ------- + :class:`Layer` + A :class:`Conv1dLayer` object. + + Examples + --------- + >>> x = tf.placeholder(tf.float32, (batch_size, width)) + >>> y_ = tf.placeholder(tf.int64, shape=(batch_size,)) + >>> n = InputLayer(x, name='in') + >>> n = ReshapeLayer(n, (-1, width, 1), name='rs') + >>> n = Conv1d(n, 64, 3, 1, act=tf.nn.relu, name='c1') + >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m1') + >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c2') + >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m2') + >>> n = Conv1d(n, 128, 3, 1, act=tf.nn.relu, name='c3') + >>> n = MaxPool1d(n, 2, 2, padding='valid', name='m3') + >>> n = FlattenLayer(n, name='f') + >>> n = DenseLayer(n, 500, tf.nn.relu, name='d1') + >>> n = DenseLayer(n, 100, tf.nn.relu, name='d2') + >>> n = DenseLayer(n, 2, tf.identity, name='o') + + """ + return Conv1dLayer( + layer=layer, + act=act, + shape=(filter_size, int(layer.outputs.get_shape()[-1]), n_filter), + stride=stride, + dilation_rate=dilation_rate, + padding=padding, + data_format=data_format, + W_init=W_init, + b_init=b_init, + W_init_args=W_init_args, + b_init_args=b_init_args, + name=name, + ) + + +def conv2d( + layer, + n_filter=32, + filter_size=(3, 3), + strides=(1, 1), + act=tf.identity, + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + use_cudnn_on_gpu=None, + data_format=None, + name='conv2d', +): + """Simplified version of :class:`Conv2dLayer`. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + strides : tuple of int + The sliding window strides of corresponding input dimensions. + It must be in the same order as the ``shape`` parameter. + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the the weight matrix. + b_init : initializer or None + The initializer for the the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + use_cudnn_on_gpu : bool + Default is False. + data_format : str + "NHWC" or "NCHW", default is "NHWC". + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A :class:`Conv2dLayer` object. + + Examples + -------- + >>> net = InputLayer(x, name='inputs') + >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_1') + >>> net = Conv2d(net, 64, (3, 3), act=tf.nn.relu, name='conv1_2') + >>> net = MaxPool2d(net, (2, 2), name='pool1') + >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_1') + >>> net = Conv2d(net, 128, (3, 3), act=tf.nn.relu, name='conv2_2') + >>> net = MaxPool2d(net, (2, 2), name='pool2') + + """ + assert len(strides) == 2, "len(strides) should be 2, Conv2d and Conv2dLayer are different." + try: + pre_channel = int(layer.outputs.get_shape()[-1]) + except: # if pre_channel is ?, it happens when using Spatial Transformer Net + pre_channel = 1 + logging.info("[warnings] unknow input channels, set to 1") + return Conv2dLayer( + layer, + act=act, + shape=(filter_size[0], filter_size[1], pre_channel, n_filter), # 32 features for each 5x5 patch + strides=(1, strides[0], strides[1], 1), + padding=padding, + W_init=W_init, + W_init_args=W_init_args, + b_init=b_init, + b_init_args=b_init_args, + use_cudnn_on_gpu=use_cudnn_on_gpu, + data_format=data_format, + name=name) + + +def deconv2d(layer, + n_filter=32, + filter_size=(3, 3), + out_size=(30, 30), + strides=(2, 2), + padding='SAME', + batch_size=None, + act=tf.identity, + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='decnn2d'): + """Simplified version of :class:`DeConv2dLayer`. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (height, width). + out_size : tuple of int + Require if TF version < 1.3, (height, width) of output. + strides : tuple of int + The stride step (height, width). + padding : str + The padding algorithm type: "SAME" or "VALID". + batch_size : int + Require if TF version < 1.3, int or None. + If None, try to find the `batch_size` from the first dim of net.outputs (you should define the `batch_size` in the input placeholder). + act : activation function + The activation function of this layer. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A :class:`DeConv2dLayer` object. + + """ + if act is None: + act = tf.identity + assert len(strides) == 2, "len(strides) should be 2, DeConv2d and DeConv2dLayer are different." + if tf.__version__ > '1.3': + logging.info("DeConv2d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__)) + inputs = layer.outputs + scope_name = tf.get_variable_scope().name + if scope_name: + whole_name = scope_name + '/' + name + else: + whole_name = name + net_new = Layer(inputs, name=whole_name) + # with tf.name_scope(name): + with tf.variable_scope(name) as vs: + net_new.outputs = tf.contrib.layers.conv2d_transpose( + inputs=inputs, + num_outputs=n_filter, + kernel_size=filter_size, + stride=strides, + padding=padding, + activation_fn=act, + weights_initializer=W_init, + biases_initializer=b_init, + scope=name) + new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + net_new.all_layers = list(layer.all_layers) + net_new.all_params = list(layer.all_params) + net_new.all_drop = dict(layer.all_drop) + net_new.all_layers.extend([net_new.outputs]) + net_new.all_params.extend(new_variables) + return net_new + else: + if batch_size is None: + # batch_size = tf.shape(net.outputs)[0] + fixed_batch_size = layer.outputs.get_shape().with_rank_at_least(1)[0] + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(layer.outputs)[0] + return DeConv2dLayer( + layer=layer, + act=act, + shape=(filter_size[0], filter_size[1], n_filter, int(layer.outputs.get_shape()[-1])), + output_shape=(batch_size, int(out_size[0]), int(out_size[1]), n_filter), + strides=(1, strides[0], strides[1], 1), + padding=padding, + W_init=W_init, + b_init=b_init, + W_init_args=W_init_args, + b_init_args=b_init_args, + name=name) + + +class DeConv3d(Layer): + """Simplified version of The :class:`DeConv3dLayer`, see `tf.contrib.layers.conv3d_transpose `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + n_filter : int + The number of filters. + filter_size : tuple of int + The filter size (depth, height, width). + stride : tuple of int + The stride step (depth, height, width). + padding : str + The padding algorithm type: "SAME" or "VALID". + act : activation function + The activation function of this layer. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip bias. + name : str + A unique layer name. + + """ + + def __init__(self, + layer, + n_filter=32, + filter_size=(3, 3, 3), + strides=(2, 2, 2), + padding='SAME', + act=tf.identity, + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + name='decnn3d'): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("DeConv3d %s: n_filters:%s strides:%s pad:%s act:%s" % (name, str(n_filter), str(strides), padding, act.__name__)) + + with tf.variable_scope(name) as vs: + self.outputs = tf.contrib.layers.conv3d_transpose( + num_outputs=n_filter, + kernel_size=filter_size, + stride=strides, + padding=padding, + activation_fn=act, + weights_initializer=W_init, + biases_initializer=b_init, + scope=name, + ) + new_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(new_variables) + + +class DepthwiseConv2d(Layer): + """Separable/Depthwise Convolutional 2D layer, see `tf.nn.depthwise_conv2d `__. + + Input: + 4-D Tensor (batch, height, width, in_channels). + Output: + 4-D Tensor (batch, new height, new width, in_channels * channel_multiplier). + + Parameters + ------------ + layer : :class:`Layer` + Previous layer. + channel_multiplier : int + The number of channels to expand to. + filter_size : tuple of int + The filter size (height, width). + stride : tuple of int + The stride step (height, width). + act : activation function + The activation function of this layer. + padding : str + The padding algorithm type: "SAME" or "VALID". + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip bias. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + Examples + --------- + >>> t_im = tf.placeholder("float32", (None, 256, 256, 3)) + >>> net = InputLayer(t_im, name='in') + >>> net = DepthwiseConv2d(net, 32, (3, 3), (1, 1, 1, 1), tf.nn.relu, padding="SAME", name='dep') + >>> print(net.outputs.get_shape()) + ... (?, 256, 256, 96) + + References + ----------- + - tflearn's `grouped_conv_2d `__ + - keras's `separableconv2d `__ + + """ + + def __init__( + self, + layer, + # n_filter = 32, + channel_multiplier=3, + shape=(3, 3), + strides=(1, 1), + act=tf.identity, + padding='SAME', + W_init=tf.truncated_normal_initializer(stddev=0.02), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='depthwise_conv2d', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + + if act is None: + act = tf.identity + + logging.info("DepthwiseConv2d %s: shape:%s strides:%s pad:%s act:%s" % (self.name, str(shape), str(strides), padding, act.__name__)) + try: + pre_channel = int(layer.outputs.get_shape()[-1]) + except: # if pre_channel is ?, it happens when using Spatial Transformer Net + pre_channel = 1 + logging.info("[warnings] unknow input channels, set to 1") + + shape = [shape[0], shape[1], pre_channel, channel_multiplier] + + if len(strides) == 2: + strides = [1, strides[0], strides[1], 1] + + assert len(strides) == 4, "len(strides) should be 4." + + with tf.variable_scope(name) as vs: + W = tf.get_variable( + name='W_sepconv2d', shape=shape, initializer=W_init, dtype=D_TYPE, + **W_init_args) # [filter_height, filter_width, in_channels, channel_multiplier] + if b_init: + b = tf.get_variable(name='b_sepconv2d', shape=(pre_channel * channel_multiplier), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding) + b) + else: + self.outputs = act(tf.nn.depthwise_conv2d(self.inputs, W, strides=strides, padding=padding)) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +# Alias +AtrousConv1dLayer = atrous_conv1d +Conv1d = conv1d +Conv2d = conv2d +DeConv2d = deconv2d diff --git a/tensorlayer/layers/core.py b/tensorlayer/layers/core.py new file mode 100644 index 00000000..213335df --- /dev/null +++ b/tensorlayer/layers/core.py @@ -0,0 +1,1312 @@ +# -*- coding: utf-8 -*- + +import time + +import numpy as np +import tensorflow as tf + +from .. import _logging as logging +from .. import cost, files, iterate, utils, visualize + +# __all__ = [ +# "Layer", +# "DenseLayer", +# ] + +# set_keep = locals() +set_keep = globals() +set_keep['_layers_name_list'] = [] +set_keep['name_reuse'] = False + +D_TYPE = tf.float32 + +try: # For TF12 and later + TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.GLOBAL_VARIABLES +except: # For TF11 and before + TF_GRAPHKEYS_VARIABLES = tf.GraphKeys.VARIABLES + + +def flatten_reshape(variable, name='flatten'): + """Reshapes a high-dimension vector input. + [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row x mask_col x n_mask] + + Parameters + ---------- + variable : TensorFlow variable or tensor + The variable or tensor to be flatten. + name : str + A unique layer name. + + Returns + ------- + Tensor + Flatten Tensor + + Examples + -------- + >>> W_conv2 = weight_variable([5, 5, 100, 32]) # 64 features for each 5x5 patch + >>> b_conv2 = bias_variable([32]) + >>> W_fc1 = weight_variable([7 * 7 * 32, 256]) + + >>> h_conv2 = tf.nn.relu(conv2d(h_pool1, W_conv2) + b_conv2) + >>> h_pool2 = max_pool_2x2(h_conv2) + >>> h_pool2.get_shape()[:].as_list() = [batch_size, 7, 7, 32] + ... [batch_size, mask_row, mask_col, n_mask] + >>> h_pool2_flat = tl.layers.flatten_reshape(h_pool2) + ... [batch_size, mask_row * mask_col * n_mask] + >>> h_pool2_flat_drop = tf.nn.dropout(h_pool2_flat, keep_prob) + ... + + """ + dim = 1 + for d in variable.get_shape()[1:].as_list(): + dim *= d + return tf.reshape(variable, shape=[-1, dim], name=name) + + +def clear_layers_name(): + """Clear all layer names in `set_keep['_layers_name_list']` if layer names are reused. + + Examples + --------- + Clean the current graph and try to re-define model. + + >>> for .... (different model settings): + >>> with tf.Graph().as_default() as graph: # clear all variables of TF + >>> tl.layers.clear_layers_name() # clear all layer name of TL + >>> sess = tf.InteractiveSession() + >>> # define and train a model here + >>> sess.close() + + Enable reusing layer names. + + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.DenseLayer(net, n_units=800, name='relu1') + ... + >>> tl.layers.clear_layers_name() + >>> net2 = tl.layers.InputLayer(x, name='input_layer') + >>> net2 = tl.layers.DenseLayer(net2, n_units=800, name='relu1') + + """ + set_keep['_layers_name_list'] = [] + + +def set_name_reuse(enable=True): + """Enable or disable reuse layer name. + + By default, each layer must has unique + name. When you want two or more input placeholder (inference) share the same + model parameters, you need to enable layer name reuse, then allow the + parameters have same name scope. + + Parameters + ---------- + enable : boolean + Enable or disable name/layer reuse, None means False. + + Examples + -------- + >>> def embed_seq(input_seqs, is_train, reuse): + >>> with tf.variable_scope("model", reuse=reuse): + >>> tl.layers.set_name_reuse(reuse) + >>> net = tl.layers.EmbeddingInputlayer( + ... inputs = input_seqs, + ... vocabulary_size = vocab_size, + ... embedding_size = embedding_size, + ... name = 'e_embedding') + >>> net = tl.layers.DynamicRNNLayer(net, + ... cell_fn = tf.contrib.rnn.BasicLSTMCell, + ... n_hidden = embedding_size, + ... dropout = (0.7 if is_train else None), + ... initializer = w_init, + ... sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs), + ... return_last = True, + ... name = 'e_dynamicrnn') + >>> return net + >>> + >>> net_train = embed_seq(t_caption, is_train=True, reuse=False) + >>> net_test = embed_seq(t_caption, is_train=False, reuse=True) + + - see ``tutorial_ptb_lstm.py`` for example. + + """ + set_keep['name_reuse'] = enable + + +def initialize_rnn_state(state, feed_dict=None): + """Returns the initialized RNN state. + The inputs are `LSTMStateTuple` or `State` of `RNNCells`, and an optional `feed_dict`. + + Parameters + ---------- + state : RNN state. + The TensorFlow's RNN state. + feed_dict : dictionary + Initial RNN state; if None, returns zero state. + + Returns + ------- + RNN state + The TensorFlow's RNN state. + + """ + try: # TF1.0 + LSTMStateTuple = tf.contrib.rnn.LSTMStateTuple + except: + LSTMStateTuple = tf.nn.rnn_cell.LSTMStateTuple + + if isinstance(state, LSTMStateTuple): + c = state.c.eval(feed_dict=feed_dict) + h = state.h.eval(feed_dict=feed_dict) + return (c, h) + else: + new_state = state.eval(feed_dict=feed_dict) + return new_state + + +def print_all_variables(train_only=False): + """Print information of trainable or all variables, + without ``tl.layers.initialize_global_variables(sess)``. + + Parameters + ---------- + train_only : boolean + Whether print trainable variables only. + - If True, print the trainable variables. + - If False, print all variables. + + """ + # tvar = tf.trainable_variables() if train_only else tf.all_variables() + if train_only: + t_vars = tf.trainable_variables() + logging.info(" [*] printing trainable variables") + else: + try: # TF1.0+ + t_vars = tf.global_variables() + except: # TF0.12 + t_vars = tf.all_variables() + logging.info(" [*] printing global variables") + for idx, v in enumerate(t_vars): + logging.info(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v.name)) + + +def get_variables_with_name(name=None, train_only=True, printable=False): + """Get a list of TensorFlow variables by a given name scope. + + Parameters + ---------- + name : str + Get the variables that contain this name. + train_only : boolean + If Ture, only get the trainable variables. + printable : boolean + If True, print the information of all variables. + + Returns + ------- + list of Tensor + A list of TensorFlow variables + + Examples + -------- + >>> dense_vars = tl.layers.get_variable_with_name('dense', True, True) + + """ + if name is None: + raise Exception("please input a name") + logging.info(" [*] geting variables with %s" % name) + # tvar = tf.trainable_variables() if train_only else tf.all_variables() + if train_only: + t_vars = tf.trainable_variables() + else: + try: # TF1.0+ + t_vars = tf.global_variables() + except: # TF0.12 + t_vars = tf.all_variables() + + d_vars = [var for var in t_vars if name in var.name] + if printable: + for idx, v in enumerate(d_vars): + logging.info(" got {:3}: {:15} {}".format(idx, v.name, str(v.get_shape()))) + return d_vars + + +def get_layers_with_name(net, name="", printable=False): + """Get a list of layers' output in a network by a given name scope. + + Parameters + ----------- + net : :class:`Layer` + The last layer of the network. + name : str + Get the layers' output that contain this name. + printable : boolean + If True, print information of all the layers' output + + Returns + -------- + list of Tensor + A list of layers' output (TensorFlow tensor) + + Examples + --------- + >>> layers = tl.layers.get_layers_with_name(net, "CNN", True) + + """ + logging.info(" [*] geting layers with %s" % name) + + layers = [] + i = 0 + for layer in net.all_layers: + # logging.info(type(layer.name)) + if name in layer.name: + layers.append(layer) + if printable: + logging.info(" got {:3}: {:15} {}".format(i, layer.name, str(layer.get_shape()))) + i = i + 1 + return layers + + +def list_remove_repeat(x): + """Remove the repeated items in a list, and return the processed list. + You may need it to create merged layer like Concat, Elementwise and etc. + + Parameters + ---------- + x : list + Input + + Returns + ------- + list + A list that after removing it's repeated items + + Examples + ------- + >>> l = [2, 3, 4, 2, 3] + >>> l = list_remove_repeat(l) + ... [2, 3, 4] + + """ + y = [] + [y.append(i) for i in x if not i in y] + return y + + +def merge_networks(layers=[]): + """Merge all parameters, layers and dropout probabilities to a :class:`Layer`. + The output of return network is the first network in the list. + + Parameters + ---------- + layers : list of :class:`Layer` + Merge all parameters, layers and dropout probabilities to the first layer in the list. + + Returns + -------- + :class:`Layer` + The network after merging all parameters, layers and dropout probabilities to the first network in the list. + + Examples + --------- + >>> n1 = ... + >>> n2 = ... + >>> n1 = tl.layers.merge_networks([n1, n2]) + + """ + layer = layers[0] + + all_params = [] + all_layers = [] + all_drop = {} + for l in layers: + all_params.extend(l.all_params) + all_layers.extend(l.all_layers) + all_drop.update(l.all_drop) + + layer.all_params = list(all_params) + layer.all_layers = list(all_layers) + layer.all_drop = dict(all_drop) + + layer.all_layers = list_remove_repeat(layer.all_layers) + layer.all_params = list_remove_repeat(layer.all_params) + + return layer + + +def initialize_global_variables(sess): + """Initialize the global variables of TensorFlow. + + Run ``sess.run(tf.global_variables_initializer())`` for TF 0.12+ or + ``sess.run(tf.initialize_all_variables())`` for TF 0.11. + + Parameters + ---------- + sess : Session + TensorFlow session. + + """ + assert sess is not None + # try: # TF12+ + sess.run(tf.global_variables_initializer()) + # except: # TF11 + # sess.run(tf.initialize_all_variables()) + + +class Layer(object): + """ + The basic :class:`Layer` class represents a single layer of a neural network. It + should be subclassed when implementing new types of layers. + Because each layer can keep track of the layer(s) feeding into it, a + network's output :class:`Layer` instance can double as a handle to the full + network. + + Parameters + ---------- + inputs : :class:`Layer` instance + The `Layer` class feeding into this layer. + name : str or None + A unique layer name. + + Methods + --------- + print_params(details=True, session=None) + Print all parameters of this network. + print_layers() + Print all outputs of all layers of this network. + count_params() + Return the number of parameters of this network. + """ + + def __init__(self, inputs=None, name='layer'): + self.inputs = inputs + scope_name = tf.get_variable_scope().name + if scope_name: + name = scope_name + '/' + name + if (name in set_keep['_layers_name_list']) and set_keep['name_reuse'] == False: + raise Exception("Layer '%s' already exists, please choice other 'name' or reuse this layer\ + \nHint : Use different name for different 'Layer' (The name is used to control parameter sharing)\ + \nAdditional Informations: http://tensorlayer.readthedocs.io/en/latest/modules/layers.html?highlight=clear_layers_name#tensorlayer.layers.clear_layers_name" + % name) + else: + self.name = name + if name not in ['', None, False]: + set_keep['_layers_name_list'].append(name) + + def print_params(self, details=True, session=None): + """Print all info of parameters in the network""" + for i, p in enumerate(self.all_params): + if details: + try: + # logging.info(" param {:3}: {:15} (mean: {:<18}, median: {:<18}, std: {:<18}) {}".format(i, str(p.eval().shape), p.eval().mean(), np.median(p.eval()), p.eval().std(), p.name)) + val = p.eval(session=session) + logging.info(" param {:3}: {:20} {:15} {} (mean: {:<18}, median: {:<18}, std: {:<18}) ".format( + i, p.name, str(val.shape), p.dtype.name, val.mean(), np.median(val), val.std())) + except Exception as e: + logging.info(str(e)) + raise Exception("Hint: print params details after tl.layers.initialize_global_variables(sess) or use network.print_params(False).") + else: + logging.info(" param {:3}: {:20} {:15} {}".format(i, p.name, str(p.get_shape()), p.dtype.name)) + logging.info(" num of params: %d" % self.count_params()) + + def print_layers(self): + """Print all info of layers in the network""" + for i, layer in enumerate(self.all_layers): + # logging.info(" layer %d: %s" % (i, str(layer))) + logging.info(" layer {:3}: {:20} {:15} {}".format(i, layer.name, str(layer.get_shape()), layer.dtype.name)) + + def count_params(self): + """Return the number of parameters in the network""" + n_params = 0 + for i, p in enumerate(self.all_params): + n = 1 + # for s in p.eval().shape: + for s in p.get_shape(): + try: + s = int(s) + except: + s = 1 + if s: + n = n * s + n_params = n_params + n + return n_params + + def __str__(self): + # logging.info("\nIt is a Layer class") + # self.print_params(False) + # self.print_layers() + return " Last layer is: %s" % self.__class__.__name__ + + +class InputLayer(Layer): + """ + The :class:`InputLayer` class is the starting layer of a neural network. + + Parameters + ---------- + inputs : placeholder or tensor + The input of a network. + name : str + A unique layer name. + + """ + + def __init__(self, inputs=None, name='input'): + Layer.__init__(self, inputs=inputs, name=name) + logging.info("InputLayer %s: %s" % (self.name, inputs.get_shape())) + self.outputs = inputs + self.all_layers = [] + self.all_params = [] + self.all_drop = {} + + +class OneHotInputLayer(Layer): + """ + The :class:`OneHotInputLayer` class is the starting layer of a neural network, see ``tf.one_hot``. + + Parameters + ---------- + inputs : placeholder or tensor + The input of a network. + depth : None or int + If the input indices is rank N, the output will have rank N+1. The new axis is created at dimension `axis` (default: the new axis is appended at the end). + on_value : None or number + The value to represnt `ON`. If None, it will default to the value 1. + off_value : None or number + The value to represnt `OFF`. If None, it will default to the value 0. + axis : None or int + The axis. + dtype : None or TensorFlow dtype + The data type, None means tf.float32. + name : str + A unique layer name. + + """ + + def __init__(self, inputs=None, depth=None, on_value=None, off_value=None, axis=None, dtype=None, name='input'): + Layer.__init__(self, inputs=inputs, name=name) + logging.info("OneHotInputLayer %s: %s" % (self.name, inputs.get_shape())) + # assert depth != None, "depth is not given" + if depth is None: + logging.info(" [*] depth == None the number of output units is undefined") + self.outputs = tf.one_hot(inputs, depth, on_value=on_value, off_value=off_value, axis=axis, dtype=dtype) + self.all_layers = [] + self.all_params = [] + self.all_drop = {} + + +class Word2vecEmbeddingInputlayer(Layer): + """ + The :class:`Word2vecEmbeddingInputlayer` class is a fully connected layer. + For Word Embedding, words are input as integer index. + The output is the embedded word vector. + + Parameters + ---------- + inputs : placeholder or tensor + The input of a network. For word inputs, please use integer index format, 2D tensor : [batch_size, num_steps(num_words)] + train_labels : placeholder + For word labels. integer index format + vocabulary_size : int + The size of vocabulary, number of words + embedding_size : int + The number of embedding dimensions + num_sampled : int + The mumber of negative examples for NCE loss + nce_loss_args : dictionary + The arguments for tf.nn.nce_loss() + E_init : initializer + The initializer for initializing the embedding matrix + E_init_args : dictionary + The arguments for embedding initializer + nce_W_init : initializer + The initializer for initializing the nce decoder weight matrix + nce_W_init_args : dictionary + The arguments for initializing the nce decoder weight matrix + nce_b_init : initializer + The initializer for initializing of the nce decoder bias vector + nce_b_init_args : dictionary + The arguments for initializing the nce decoder bias vector + name : str + A unique layer name + + Attributes + ---------- + nce_cost : Tensor + The NCE loss. + outputs : Tensor + The embedding layer outputs. + normalized_embeddings : Tensor + Normalized embedding matrix. + + Examples + -------- + With TensorLayer : see ``tensorlayer/example/tutorial_word2vec_basic.py`` + + >>> train_inputs = tf.placeholder(tf.int32, shape=(batch_size)) + >>> train_labels = tf.placeholder(tf.int32, shape=(batch_size, 1)) + >>> emb_net = tl.layers.Word2vecEmbeddingInputlayer( + ... inputs = train_inputs, + ... train_labels = train_labels, + ... vocabulary_size = vocabulary_size, + ... embedding_size = embedding_size, + ... num_sampled = num_sampled, + ... name ='word2vec', + ... ) + >>> cost = emb_net.nce_cost + >>> train_params = emb_net.all_params + >>> train_op = tf.train.GradientDescentOptimizer(learning_rate).minimize( + ... cost, var_list=train_params) + >>> normalized_embeddings = emb_net.normalized_embeddings + + Without TensorLayer : see ``tensorflow/examples/tutorials/word2vec/word2vec_basic.py`` + + >>> train_inputs = tf.placeholder(tf.int32, shape=(batch_size)) + >>> train_labels = tf.placeholder(tf.int32, shape=(batch_size, 1)) + >>> embeddings = tf.Variable( + ... tf.random_uniform([vocabulary_size, embedding_size], -1.0, 1.0)) + >>> embed = tf.nn.embedding_lookup(embeddings, train_inputs) + >>> nce_weights = tf.Variable( + ... tf.truncated_normal([vocabulary_size, embedding_size], + ... stddev=1.0 / math.sqrt(embedding_size))) + >>> nce_biases = tf.Variable(tf.zeros([vocabulary_size])) + >>> cost = tf.reduce_mean( + ... tf.nn.nce_loss(weights=nce_weights, biases=nce_biases, + ... inputs=embed, labels=train_labels, + ... num_sampled=num_sampled, num_classes=vocabulary_size, + ... num_true=1)) + + References + ---------- + `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ + + """ + + def __init__( + self, + inputs=None, + train_labels=None, + vocabulary_size=80000, + embedding_size=200, + num_sampled=64, + nce_loss_args={}, + E_init=tf.random_uniform_initializer(minval=-1.0, maxval=1.0), + E_init_args={}, + nce_W_init=tf.truncated_normal_initializer(stddev=0.03), + nce_W_init_args={}, + nce_b_init=tf.constant_initializer(value=0.0), + nce_b_init_args={}, + name='word2vec', + ): + Layer.__init__(self, name=name) + self.inputs = inputs + logging.info("Word2vecEmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) + # Look up embeddings for inputs. + # Note: a row of 'embeddings' is the vector representation of a word. + # for the sake of speed, it is better to slice the embedding matrix + # instead of transfering a word id to one-hot-format vector and then + # multiply by the embedding matrix. + # embed is the outputs of the hidden layer (embedding layer), it is a + # row vector with 'embedding_size' values. + with tf.variable_scope(name) as vs: + embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=D_TYPE, **E_init_args) + embed = tf.nn.embedding_lookup(embeddings, self.inputs) + # Construct the variables for the NCE loss (i.e. negative sampling) + nce_weights = tf.get_variable(name='nce_weights', shape=(vocabulary_size, embedding_size), initializer=nce_W_init, dtype=D_TYPE, **nce_W_init_args) + nce_biases = tf.get_variable(name='nce_biases', shape=(vocabulary_size), initializer=nce_b_init, dtype=D_TYPE, **nce_b_init_args) + + # Compute the average NCE loss for the batch. + # tf.nce_loss automatically draws a new sample of the negative labels + # each time we evaluate the loss. + self.nce_cost = tf.reduce_mean( + tf.nn.nce_loss( + weights=nce_weights, + biases=nce_biases, + inputs=embed, + labels=train_labels, + num_sampled=num_sampled, + num_classes=vocabulary_size, + **nce_loss_args)) + + self.outputs = embed + self.normalized_embeddings = tf.nn.l2_normalize(embeddings, 1) + + self.all_layers = [self.outputs] + self.all_params = [embeddings, nce_weights, nce_biases] + self.all_drop = {} + + +class EmbeddingInputlayer(Layer): + """ + The :class:`EmbeddingInputlayer` class is a look-up table for word embedding. + + Word content are accessed using integer indexes, then the output is the embedded word vector. + To train a word embedding matrix, you can used :class:`Word2vecEmbeddingInputlayer`. + If you have a pre-trained matrix, you can assign the parameters into it. + + Parameters + ---------- + inputs : placeholder + The input of a network. For word inputs. + Please use integer index format, 2D tensor : (batch_size, num_steps(num_words)). + vocabulary_size : int + The size of vocabulary, number of words. + embedding_size : int + The number of embedding dimensions. + E_init : initializer + The initializer for the embedding matrix. + E_init_args : dictionary + The arguments for embedding matrix initializer. + name : str + A unique layer name. + + Attributes + ---------- + outputs : tensor + The embedding layer output is a 3D tensor in the shape: (batch_size, num_steps(num_words), embedding_size). + + Examples + -------- + >>> x = tf.placeholder(tf.int32, shape=(batch_size,)) + >>> emb_net = tl.layers.EmbeddingInputlayer( + ... inputs = x, + ... vocabulary_size = vocabulary_size, + ... embedding_size = embedding_size, + ... name ='embed') + + """ + + def __init__( + self, + inputs=None, + vocabulary_size=80000, + embedding_size=200, + E_init=tf.random_uniform_initializer(-0.1, 0.1), + E_init_args={}, + name='embedding', + ): + Layer.__init__(self, name=name) + self.inputs = inputs + logging.info("EmbeddingInputlayer %s: (%d, %d)" % (self.name, vocabulary_size, embedding_size)) + + with tf.variable_scope(name) as vs: + embeddings = tf.get_variable(name='embeddings', shape=(vocabulary_size, embedding_size), initializer=E_init, dtype=D_TYPE, **E_init_args) + embed = tf.nn.embedding_lookup(embeddings, self.inputs) + + self.outputs = embed + + self.all_layers = [self.outputs] + self.all_params = [embeddings] + self.all_drop = {} + + +class AverageEmbeddingInputlayer(Layer): + """The :class:`AverageEmbeddingInputlayer` averages over embeddings of inputs. + This is often used as the input layer for models like DAN[1] and FastText[2]. + + Parameters + ---------- + inputs : placeholder or tensor + The network input. + For word inputs, please use integer index format, 2D tensor: (batch_size, num_steps(num_words)). + vocabulary_size : int + The size of vocabulary. + embedding_size : int + The dimension of the embedding vectors. + pad_value : int + The scalar padding value used in inputs, 0 as default. + embeddings_initializer : initializer + The initializer of the embedding matrix. + embeddings_kwargs : None or dictionary + The arguments to get embedding matrix variable. + name : str + A unique layer name. + + References + ---------- + - [1] Iyyer, M., Manjunatha, V., Boyd-Graber, J., & Daum’e III, H. (2015). Deep Unordered Composition Rivals Syntactic Methods for Text Classification. In Association for Computational Linguistics. + - [2] Joulin, A., Grave, E., Bojanowski, P., & Mikolov, T. (2016). `Bag of Tricks for Efficient Text Classification. `__ + + """ + + def __init__( + self, + inputs, + vocabulary_size, + embedding_size, + pad_value=0, + embeddings_initializer=tf.random_uniform_initializer(-0.1, 0.1), + embeddings_kwargs=None, + name='average_embedding', + ): + super().__init__(name=name) + + # if embeddings_kwargs is None: + # embeddings_kwargs = {} + + if inputs.get_shape().ndims != 2: + raise ValueError('inputs must be of size batch_size * batch_sentence_length') + + self.inputs = inputs + + logging.info("AverageEmbeddingInputlayer %s: (%d, %d)" % (name, vocabulary_size, embedding_size)) + with tf.variable_scope(name): + self.embeddings = tf.get_variable( + name='embeddings', + shape=(vocabulary_size, embedding_size), + initializer=embeddings_initializer, + dtype=D_TYPE, + **(embeddings_kwargs or {}) + # **embeddings_kwargs + ) # **(embeddings_kwargs or {}), + + word_embeddings = tf.nn.embedding_lookup( + self.embeddings, + self.inputs, + name='word_embeddings', + ) + # Zero out embeddings of pad value + masks = tf.not_equal(self.inputs, pad_value, name='masks') + word_embeddings *= tf.cast( + tf.expand_dims(masks, axis=-1), + # tf.float32, + dtype=D_TYPE, + ) + sum_word_embeddings = tf.reduce_sum(word_embeddings, axis=1) + + # Count number of non-padding words in each sentence + sentence_lengths = tf.count_nonzero( + masks, + axis=1, + keep_dims=True, + # dtype=tf.float32, + dtype=D_TYPE, + name='sentence_lengths', + ) + + sentence_embeddings = tf.divide( + sum_word_embeddings, + sentence_lengths + 1e-8, # Add epsilon to avoid dividing by 0 + name='sentence_embeddings') + + self.outputs = sentence_embeddings + self.all_layers = [self.outputs] + self.all_params = [self.embeddings] + self.all_drop = {} + + +class DenseLayer(Layer): + """The :class:`DenseLayer` class is a fully connected layer. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + n_units : int + The number of units of this layer. + act : activation function + The activation function of this layer. + W_init : initializer + The initializer for the weight matrix. + b_init : initializer or None + The initializer for the bias vector. If None, skip biases. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : a str + A unique layer name. + + Examples + -------- + With TensorLayer + + >>> net = tl.layers.InputLayer(x, name='input') + >>> net = tl.layers.DenseLayer(net, 800, act=tf.nn.relu, name='relu') + + Without native TensorLayer APIs, you can do as follow. + + >>> W = tf.Variable( + ... tf.random_uniform([n_in, n_units], -1.0, 1.0), name='W') + >>> b = tf.Variable(tf.zeros(shape=[n_units]), name='b') + >>> y = tf.nn.relu(tf.matmul(inputs, W) + b) + + Notes + ----- + If the layer input has more than two axes, it needs to be flatten by using :class:`FlattenLayer`. + + """ + + def __init__( + self, + layer, + n_units=100, + act=tf.identity, + W_init=tf.truncated_normal_initializer(stddev=0.1), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='dense', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if self.inputs.get_shape().ndims != 2: + raise Exception("The input dimension must be rank 2, please reshape or flatten it") + + n_in = int(self.inputs.get_shape()[-1]) + self.n_units = n_units + logging.info("DenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args) + if b_init is not None: + try: + b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args) + except: # If initializer is a constant, do not specify shape. + b = tf.get_variable(name='b', initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.matmul(self.inputs, W) + b) + else: + self.outputs = act(tf.matmul(self.inputs, W)) + + # Hint : list(), dict() is pass by value (shallow), without them, it is + # pass by reference. + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + if b_init is not None: + self.all_params.extend([W, b]) + else: + self.all_params.extend([W]) + + +class ReconLayer(DenseLayer): + """A reconstruction layer for :class:`DenseLayer` to implement AutoEncoder. + + It is often used to pre-train the previous :class:`DenseLayer` + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + x_recon : placeholder or tensor + The target for reconstruction. + n_units : int + The number of units of the layer. It should equal ``x_recon``. + act : activation function + The activation function of this layer. + Normally, for sigmoid layer, the reconstruction activation is ``sigmoid``; + for rectifying layer, the reconstruction activation is ``softplus``. + name : str + A unique layer name. + + Examples + -------- + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.DenseLayer(net, n_units=196, + ... act=tf.nn.sigmoid, name='sigmoid1') + >>> recon_layer1 = tl.layers.ReconLayer(net, x_recon=x, n_units=784, + ... act=tf.nn.sigmoid, name='recon_layer1') + >>> recon_layer1.pretrain(sess, x=x, X_train=X_train, X_val=X_val, + ... denoise_name=None, n_epoch=1200, batch_size=128, + ... print_freq=10, save=True, save_name='w1pre_') + + Methods + ------- + pretrain(sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre') + Start to pre-train the parameters of the previous DenseLayer. + + Notes + ----- + The input layer should be `DenseLayer` or a layer that has only one axes. + You may need to modify this part to define your own cost function. + By default, the cost is implemented as follow: + - For sigmoid layer, the implementation can be `UFLDL `__ + - For rectifying layer, the implementation can be `Glorot (2011). Deep Sparse Rectifier Neural Networks `__ + + """ + + def __init__( + self, + layer, + x_recon=None, + n_units=784, + act=tf.nn.softplus, + name='recon', + ): + DenseLayer.__init__(self, layer=layer, n_units=n_units, act=act, name=name) + logging.info("%s is a ReconLayer" % self.name) + + # y : reconstruction outputs; train_params : parameters to train + # Note that: train_params = [W_encoder, b_encoder, W_decoder, b_encoder] + y = self.outputs + self.train_params = self.all_params[-4:] + + # ===================================================================== + # + # You need to modify the below cost function and optimizer so as to + # implement your own pre-train method. + # + # ===================================================================== + lambda_l2_w = 0.004 + learning_rate = 0.0001 + logging.info(" lambda_l2_w: %f" % lambda_l2_w) + logging.info(" learning_rate: %f" % learning_rate) + + # Mean-square-error i.e. quadratic-cost + mse = tf.reduce_sum(tf.squared_difference(y, x_recon), 1) + mse = tf.reduce_mean(mse) # in theano: mse = ((y - x) ** 2 ).sum(axis=1).mean() + # mse = tf.reduce_mean(tf.reduce_sum(tf.square(tf.sub(y, x_recon)), 1)) + # mse = tf.reduce_mean(tf.squared_difference(y, x_recon)) # : Error + # mse = tf.sqrt(tf.reduce_mean(tf.square(y - x_recon))) # : Error + # Cross-entropy + # ce = cost.cross_entropy(y, x_recon) # : list , list , Error (only be used for softmax output) + # ce = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(y, x_recon)) # : list , list , Error (only be used for softmax output) + # ce = tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(y, x_recon)) # : list , index , Error (only be used for softmax output) + L2_w = tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[0]) \ + + tf.contrib.layers.l2_regularizer(lambda_l2_w)(self.train_params[2]) # faster than the code below + # L2_w = lambda_l2_w * tf.reduce_mean(tf.square(self.train_params[0])) + lambda_l2_w * tf.reduce_mean( tf.square(self.train_params[2])) + # DropNeuro + P_o = cost.lo_regularizer(0.03)( + self.train_params[0]) # + cost.lo_regularizer(0.5)(self.train_params[2]) # : if add lo on decoder, no neuron will be broken + P_i = cost.li_regularizer(0.03)(self.train_params[0]) # + cost.li_regularizer(0.001)(self.train_params[2]) + + # L1 of activation outputs + activation_out = self.all_layers[-2] + L1_a = 0.001 * tf.reduce_mean(activation_out) # : theano: T.mean( self.a[i] ) # some neuron are broken, white and black + # L1_a = 0.001 * tf.reduce_mean( tf.reduce_sum(activation_out, 0) ) # : some neuron are broken, white and black + # L1_a = 0.001 * 100 * tf.reduce_mean( tf.reduce_sum(activation_out, 1) ) # : some neuron are broken, white and black + # KL Divergence + beta = 4 + rho = 0.15 + p_hat = tf.reduce_mean(activation_out, 0) # theano: p_hat = T.mean( self.a[i], axis=0 ) + try: # TF1.0 + KLD = beta * tf.reduce_sum(rho * tf.log(tf.divide(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.subtract(float(1), p_hat)))) + except: # TF0.12 + KLD = beta * tf.reduce_sum(rho * tf.log(tf.div(rho, p_hat)) + (1 - rho) * tf.log((1 - rho) / (tf.sub(float(1), p_hat)))) + # KLD = beta * tf.reduce_sum( rho * tf.log(rho/ p_hat) + (1- rho) * tf.log((1- rho)/(1- p_hat)) ) + # theano: L1_a = l1_a[i] * T.sum( rho[i] * T.log(rho[i]/ p_hat) + (1- rho[i]) * T.log((1- rho[i])/(1- p_hat)) ) + # Total cost + if act == tf.nn.softplus: + logging.info(' use: mse, L2_w, L1_a') + self.cost = mse + L1_a + L2_w + elif act == tf.nn.sigmoid: + # ---------------------------------------------------- + # Cross-entropy was used in Denoising AE + # logging.info(' use: ce, L2_w, KLD') + # self.cost = ce + L2_w + KLD + # ---------------------------------------------------- + # Mean-squared-error was used in Vanilla AE + logging.info(' use: mse, L2_w, KLD') + self.cost = mse + L2_w + KLD + # ---------------------------------------------------- + # Add DropNeuro penalty (P_o) can remove neurons of AE + # logging.info(' use: mse, L2_w, KLD, P_o') + # self.cost = mse + L2_w + KLD + P_o + # ---------------------------------------------------- + # Add DropNeuro penalty (P_i) can remove neurons of previous layer + # If previous layer is InputLayer, it means remove useless features + # logging.info(' use: mse, L2_w, KLD, P_i') + # self.cost = mse + L2_w + KLD + P_i + else: + raise Exception("Don't support the given reconstruct activation function") + + self.train_op = tf.train.AdamOptimizer( + learning_rate, beta1=0.9, beta2=0.999, epsilon=1e-08, use_locking=False).minimize( + self.cost, var_list=self.train_params) + # self.train_op = tf.train.GradientDescentOptimizer(1.0).minimize(self.cost, var_list=self.train_params) + + def pretrain(self, sess, x, X_train, X_val, denoise_name=None, n_epoch=100, batch_size=128, print_freq=10, save=True, save_name='w1pre_'): + # ==================================================== + # + # You need to modify the cost function in __init__() so as to + # get your own pre-train method. + # + # ==================================================== + logging.info(" [*] %s start pretrain" % self.name) + logging.info(" batch_size: %d" % batch_size) + if denoise_name: + logging.info(" denoising layer keep: %f" % self.all_drop[set_keep[denoise_name]]) + dp_denoise = self.all_drop[set_keep[denoise_name]] + else: + logging.info(" no denoising layer") + + for epoch in range(n_epoch): + start_time = time.time() + for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True): + dp_dict = utils.dict_to_one(self.all_drop) + if denoise_name: + dp_dict[set_keep[denoise_name]] = dp_denoise + feed_dict = {x: X_train_a} + feed_dict.update(dp_dict) + sess.run(self.train_op, feed_dict=feed_dict) + + if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: + logging.info("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) + train_loss, n_batch = 0, 0 + for X_train_a, _ in iterate.minibatches(X_train, X_train, batch_size, shuffle=True): + dp_dict = utils.dict_to_one(self.all_drop) + feed_dict = {x: X_train_a} + feed_dict.update(dp_dict) + err = sess.run(self.cost, feed_dict=feed_dict) + train_loss += err + n_batch += 1 + logging.info(" train loss: %f" % (train_loss / n_batch)) + val_loss, n_batch = 0, 0 + for X_val_a, _ in iterate.minibatches(X_val, X_val, batch_size, shuffle=True): + dp_dict = utils.dict_to_one(self.all_drop) + feed_dict = {x: X_val_a} + feed_dict.update(dp_dict) + err = sess.run(self.cost, feed_dict=feed_dict) + val_loss += err + n_batch += 1 + logging.info(" val loss: %f" % (val_loss / n_batch)) + if save: + try: + visualize.draw_weights( + self.train_params[0].eval(), second=10, saveable=True, shape=[28, 28], name=save_name + str(epoch + 1), fig_idx=2012) + files.save_npz([self.all_params[0]], name=save_name + str(epoch + 1) + '.npz') + except: + raise Exception( + "You should change the visualize.W() in ReconLayer.pretrain(), if you want to save the feature images for different dataset") + + +class DropoutLayer(Layer): + """ + The :class:`DropoutLayer` class is a noise layer which randomly set some + activations to zero according to a keeping probability. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + keep : float + The keeping probability. + The lower the probability it is, the more activations are set to zero. + is_fix : boolean + Fixing probability or nor. Default is False. + If True, the keeping probability is fixed and cannot be changed via `feed_dict`. + is_train : boolean + Trainable or not. If False, skip this layer. Default is True. + seed : int or None + The seed for random dropout. + name : str + A unique layer name. + + Examples + -------- + Method 1: Using ``all_drop`` see `tutorial_mlp_dropout1.py `__ + + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.DropoutLayer(net, keep=0.8, name='drop1') + >>> net = tl.layers.DenseLayer(net, n_units=800, act=tf.nn.relu, name='relu1') + >>> ... + >>> # For training, enable dropout as follow. + >>> feed_dict = {x: X_train_a, y_: y_train_a} + >>> feed_dict.update( net.all_drop ) # enable noise layers + >>> sess.run(train_op, feed_dict=feed_dict) + >>> ... + >>> # For testing, disable dropout as follow. + >>> dp_dict = tl.utils.dict_to_one( net.all_drop ) # disable noise layers + >>> feed_dict = {x: X_val_a, y_: y_val_a} + >>> feed_dict.update(dp_dict) + >>> err, ac = sess.run([cost, acc], feed_dict=feed_dict) + >>> ... + + Method 2: Without using ``all_drop`` see `tutorial_mlp_dropout2.py `__ + + >>> def mlp(x, is_train=True, reuse=False): + >>> with tf.variable_scope("MLP", reuse=reuse): + >>> tl.layers.set_name_reuse(reuse) + >>> net = tl.layers.InputLayer(x, name='input') + >>> net = tl.layers.DropoutLayer(net, keep=0.8, is_fix=True, + >>> is_train=is_train, name='drop1') + >>> ... + >>> return net + >>> # define inferences + >>> net_train = mlp(x, is_train=True, reuse=False) + >>> net_test = mlp(x, is_train=False, reuse=True) + + """ + + def __init__( + self, + layer, + keep=0.5, + is_fix=False, + is_train=True, + seed=None, + name='dropout_layer', + ): + Layer.__init__(self, name=name) + if is_train is False: + logging.info("skip DropoutLayer") + self.outputs = layer.outputs + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + else: + self.inputs = layer.outputs + logging.info("DropoutLayer %s: keep:%f is_fix:%s" % (self.name, keep, is_fix)) + + # The name of placeholder for keep_prob is the same with the name + # of the Layer. + if is_fix: + self.outputs = tf.nn.dropout(self.inputs, keep, seed=seed, name=name) + else: + set_keep[name] = tf.placeholder(tf.float32) + self.outputs = tf.nn.dropout(self.inputs, set_keep[name], seed=seed, name=name) # 1.2 + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + if is_fix is False: + self.all_drop.update({set_keep[name]: keep}) + self.all_layers.extend([self.outputs]) + + # logging.info(set_keep[name]) + # Tensor("Placeholder_2:0", dtype=float32) + # logging.info(denoising1) + # Tensor("Placeholder_2:0", dtype=float32) + # logging.info(self.all_drop[denoising1]) + # 0.8 + # + # https://www.tensorflow.org/versions/r0.8/tutorials/mnist/tf/index.html + # The optional feed_dict argument allows the caller to override the + # value of tensors in the graph. Each key in feed_dict can be one of + # the following types: + # If the key is a Tensor, the value may be a Python scalar, string, + # list, or numpy ndarray that can be converted to the same dtype as that + # tensor. Additionally, if the key is a placeholder, the shape of the + # value will be checked for compatibility with the placeholder. + # If the key is a SparseTensor, the value should be a SparseTensorValue. + + +class GaussianNoiseLayer(Layer): + """ + The :class:`GaussianNoiseLayer` class is noise layer that adding noise with + gaussian distribution to the activation. + + Parameters + ------------ + layer : :class:`Layer` + Previous layer. + mean : float + The mean. Default is 0. + stddev : float + The standard deviation. Default is 1. + is_train : boolean + Is trainable layer. If False, skip this layer. default is True. + seed : int or None + The seed for random noise. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + mean=0.0, + stddev=1.0, + is_train=True, + seed=None, + name='gaussian_noise_layer', + ): + Layer.__init__(self, name=name) + if is_train is False: + logging.info("skip GaussianNoiseLayer") + self.outputs = layer.outputs + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + else: + self.inputs = layer.outputs + logging.info("GaussianNoiseLayer %s: mean:%f stddev:%f" % (self.name, mean, stddev)) + with tf.variable_scope(name) as vs: + # noise = np.random.normal(0.0 , sigma , tf.to_int64(self.inputs).get_shape()) + noise = tf.random_normal(shape=self.inputs.get_shape(), mean=mean, stddev=stddev, seed=seed) + self.outputs = self.inputs + noise + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + +class DropconnectDenseLayer(Layer): + """ + The :class:`DropconnectDenseLayer` class is :class:`DenseLayer` with DropConnect + behaviour which randomly removes connections between this layer and the previous + layer according to a keeping probability. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + keep : float + The keeping probability. + The lower the probability it is, the more activations are set to zero. + n_units : int + The number of units of this layer. + act : activation function + The activation function of this layer. + W_init : weights initializer + The initializer for the weight matrix. + b_init : biases initializer + The initializer for the bias vector. + W_init_args : dictionary + The arguments for the weight matrix initializer. + b_init_args : dictionary + The arguments for the bias vector initializer. + name : str + A unique layer name. + + Examples + -------- + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.8, + ... n_units=800, act=tf.nn.relu, name='relu1') + >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.5, + ... n_units=800, act=tf.nn.relu, name='relu2') + >>> net = tl.layers.DropconnectDenseLayer(net, keep=0.5, + ... n_units=10, name='output') + + References + ---------- + - `Wan, L. (2013). Regularization of neural networks using dropconnect `__ + + """ + + def __init__( + self, + layer, + keep=0.5, + n_units=100, + act=tf.identity, + W_init=tf.truncated_normal_initializer(stddev=0.1), + b_init=tf.constant_initializer(value=0.0), + W_init_args={}, + b_init_args={}, + name='dropconnect_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + if self.inputs.get_shape().ndims != 2: + raise Exception("The input dimension must be rank 2") + n_in = int(self.inputs.get_shape()[-1]) + self.n_units = n_units + logging.info("DropconnectDenseLayer %s: %d %s" % (self.name, self.n_units, act.__name__)) + + with tf.variable_scope(name) as vs: + W = tf.get_variable(name='W', shape=(n_in, n_units), initializer=W_init, dtype=D_TYPE, **W_init_args) + b = tf.get_variable(name='b', shape=(n_units), initializer=b_init, dtype=D_TYPE, **b_init_args) + self.outputs = act(tf.matmul(self.inputs, W) + b) #, name=name) # 1.2 + + set_keep[name] = tf.placeholder(tf.float32) + W_dropcon = tf.nn.dropout(W, set_keep[name]) + self.outputs = act(tf.matmul(self.inputs, W_dropcon) + b) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_drop.update({set_keep[name]: keep}) + self.all_layers.extend([self.outputs]) + self.all_params.extend([W, b]) diff --git a/tensorlayer/layers/extend.py b/tensorlayer/layers/extend.py new file mode 100644 index 00000000..f0585291 --- /dev/null +++ b/tensorlayer/layers/extend.py @@ -0,0 +1,77 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class ExpandDimsLayer(Layer): + """ + The :class:`ExpandDimsLayer` class inserts a dimension of 1 into a tensor's shape, + see `tf.expand_dims() `__ . + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + axis : int + The dimension index at which to expand the shape of input. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + axis, + name='expand_dims', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + + logging.info("ExpandDimsLayer %s: axis:%d" % (self.name, axis)) + with tf.variable_scope(name) as vs: + try: # TF12 TF1.0 + self.outputs = tf.expand_dims(self.inputs, axis=axis) + except: # TF11 + self.outputs = tf.expand_dims(self.inputs, dim=axis) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + # self.all_params.extend( variables ) + + +class TileLayer(Layer): + """ + The :class:`TileLayer` class constructs a tensor by tiling a given tensor, + see `tf.tile() `__ . + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + multiples: tensor + Must be one of the following types: int32, int64. + 1-D Length must be the same as the number of dimensions in input. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer=None, + multiples=None, + name='tile', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + + logging.info("TileLayer %s: multiples:%s" % (self.name, multiples)) + with tf.variable_scope(name) as vs: + self.outputs = tf.tile(self.inputs, multiples=multiples) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + # self.all_params.extend( variables ) diff --git a/tensorlayer/layers/flow_control.py b/tensorlayer/layers/flow_control.py new file mode 100644 index 00000000..5ab1b017 --- /dev/null +++ b/tensorlayer/layers/flow_control.py @@ -0,0 +1,87 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class MultiplexerLayer(Layer): + """ + The :class:`MultiplexerLayer` selects inputs to be forwarded to output. + see `tutorial_mnist_multiplexer.py`. + + Parameters + ---------- + layers : a list of :class:`Layer` + The input layers. + name : str + A unique layer name. + + Attributes + ---------- + sel : placeholder + The placeholder takes an integer for selecting which layer to output. + + Examples + -------- + >>> x = tf.placeholder(tf.float32, shape=(None, 784), name='x') + >>> y_ = tf.placeholder(tf.int64, shape=(None, ), name='y_') + >>> # define the network + >>> net_in = tl.layers.InputLayer(x, name='input_layer') + >>> net_in = tl.layers.DropoutLayer(net_in, keep=0.8, name='drop1') + >>> # net 0 + >>> net_0 = tl.layers.DenseLayer(net_in, n_units=800, + ... act = tf.nn.relu, name='net0/relu1') + >>> net_0 = tl.layers.DropoutLayer(net_0, keep=0.5, name='net0/drop2') + >>> net_0 = tl.layers.DenseLayer(net_0, n_units=800, + ... act = tf.nn.relu, name='net0/relu2') + >>> # net 1 + >>> net_1 = tl.layers.DenseLayer(net_in, n_units=800, + ... act = tf.nn.relu, name='net1/relu1') + >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop2') + >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800, + ... act = tf.nn.relu, name='net1/relu2') + >>> net_1 = tl.layers.DropoutLayer(net_1, keep=0.8, name='net1/drop3') + >>> net_1 = tl.layers.DenseLayer(net_1, n_units=800, + ... act = tf.nn.relu, name='net1/relu3') + >>> # multiplexer + >>> net_mux = tl.layers.MultiplexerLayer(layer=[net_0, net_1], name='mux_layer') + >>> network = tl.layers.ReshapeLayer(net_mux, shape=(-1, 800), name='reshape_layer') # + >>> network = tl.layers.DropoutLayer(network, keep=0.5, name='drop3') + >>> # output layer + >>> network = tl.layers.DenseLayer(network, n_units=10, + ... act = tf.identity, name='output_layer') + + """ + + def __init__(self, layers, name='mux_layer'): + Layer.__init__(self, name=name) + self.n_inputs = len(layers) + + self.inputs = [] + for l in layers: + self.inputs.append(l.outputs) + try: # TF1.0 + all_inputs = tf.stack(self.inputs, name=name) # pack means concat a list of tensor in a new dim # 1.2 + except: + all_inputs = tf.pack(self.inputs, name=name) # pack means concat a list of tensor in a new dim # 1.2 + + logging.info("MultiplexerLayer %s: n_inputs:%d" % (self.name, self.n_inputs)) + + self.sel = tf.placeholder(tf.int32) + self.outputs = tf.gather(all_inputs, self.sel, name=name) # [sel, :, : ...] # 1.2 + + # logging.info(self.outputs, vars(self.outputs)) + # # tf.reshape(self.outputs, shape=) + # exit() + # the same with ConcatLayer + self.all_layers = list(layers[0].all_layers) + self.all_params = list(layers[0].all_params) + self.all_drop = dict(layers[0].all_drop) + + for i in range(1, len(layers)): + self.all_layers.extend(list(layers[i].all_layers)) + self.all_params.extend(list(layers[i].all_params)) + self.all_drop.update(dict(layers[i].all_drop)) + + self.all_layers = list_remove_repeat(self.all_layers) + self.all_params = list_remove_repeat(self.all_params) + # self.all_drop = list_remove_repeat(self.all_drop) diff --git a/tensorlayer/layers/importer.py b/tensorlayer/layers/importer.py new file mode 100644 index 00000000..9e108ea7 --- /dev/null +++ b/tensorlayer/layers/importer.py @@ -0,0 +1,213 @@ +# -*- coding: utf-8 -*- + +from tensorflow.python.util.deprecation import deprecated +from .core import * + + +class LambdaLayer(Layer): + """A layer that takes a user-defined function using TensorFlow Lambda. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + fn : function + The function that applies to the outputs of previous layer. + fn_args : dictionary + The arguments for the function (option). + name : str + A unique layer name. + + Examples + --------- + Non-parametric case + + >>> x = tf.placeholder(tf.float32, shape=[None, 1], name='x') + >>> net = tl.layers.InputLayer(x, name='input') + >>> net = LambdaLayer(net, lambda x: 2*x, name='lambda') + + Parametric case, merge other wrappers into TensorLayer + + >>> from keras.layers import * + >>> from tensorlayer.layers import * + >>> def keras_block(x): + >>> x = Dropout(0.8)(x) + >>> x = Dense(800, activation='relu')(x) + >>> x = Dropout(0.5)(x) + >>> x = Dense(800, activation='relu')(x) + >>> x = Dropout(0.5)(x) + >>> logits = Dense(10, activation='linear')(x) + >>> return logits + >>> net = InputLayer(x, name='input') + >>> net = LambdaLayer(net, fn=keras_block, name='keras') + + """ + + def __init__( + self, + layer, + fn, + fn_args={}, + name='lambda_layer', + ): + Layer.__init__(self, name=name) + assert layer is not None + assert fn is not None + self.inputs = layer.outputs + logging.info("LambdaLayer %s" % self.name) + with tf.variable_scope(name) as vs: + self.outputs = fn(self.inputs, **fn_args) + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) + + +class SlimNetsLayer(Layer): + """A layer that merges TF-Slim models into TensorLayer. + + Models can be found in `slim-model `__, + see Inception V3 example on `Github `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + slim_layer : a slim network function + The network you want to stack onto, end with ``return net, end_points``. + slim_args : dictionary + The arguments for the slim model. + name : str + A unique layer name. + + Notes + ----- + - The due to TF-Slim stores the layers as dictionary, the ``all_layers`` in this network is not in order ! Fortunately, the ``all_params`` are in order. + + """ + + def __init__( + self, + layer, + slim_layer, + slim_args={}, + name='tfslim_layer', + ): + Layer.__init__(self, name=name) + assert slim_layer is not None + assert slim_args is not None + self.inputs = layer.outputs + logging.info("SlimNetsLayer %s: %s" % (self.name, slim_layer.__name__)) + + # with tf.variable_scope(name) as vs: + # net, end_points = slim_layer(self.inputs, **slim_args) + # slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + net, end_points = slim_layer(self.inputs, **slim_args) + + slim_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=name) + if slim_variables == []: + logging.info( + "No variables found under %s : the name of SlimNetsLayer should be matched with the begining of the ckpt file, see tutorial_inceptionV3_tfslim.py for more details" + % name) + + self.outputs = net + + slim_layers = [] + for v in end_points.values(): + # tf.contrib.layers.summaries.summarize_activation(v) + slim_layers.append(v) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + self.all_layers.extend(slim_layers) + self.all_params.extend(slim_variables) + + +@deprecated("2018-06-30", "This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing.") +class KerasLayer(Layer): + """A layer to import Keras layers into TensorLayer. + + Example can be found here `tutorial_keras.py `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + keras_layer : function + A tensor in tensor out function for building model. + keras_args : dictionary + The arguments for the `keras_layer`. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + keras_layer, + keras_args={}, + name='keras_layer', + ): + Layer.__init__(self, name=name) + assert layer is not None + assert keras_layer is not None + self.inputs = layer.outputs + logging.info("KerasLayer %s: %s" % (self.name, keras_layer)) + logging.info("This API will be removed, please use LambdaLayer instead.") + with tf.variable_scope(name) as vs: + self.outputs = keras_layer(self.inputs, **keras_args) + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) + + +@deprecated("2018-06-30", "This layer will be deprecated soon as :class:`LambdaLayer` can do the same thing.") +class EstimatorLayer(Layer): + """A layer that accepts a user-defined model. + + It is similar with :class:`KerasLayer`, see `tutorial_keras.py `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + model_fn : function + A tensor in tensor out function for building model. + args : dictionary + The arguments for the `model_fn`. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + model_fn, + args={}, + name='estimator_layer', + ): + Layer.__init__(self, name=name) + assert layer is not None + assert model_fn is not None + self.inputs = layer.outputs + logging.info("EstimatorLayer %s: %s" % (self.name, model_fn)) + logging.info("This API will be removed, please use LambdaLayer instead.") + with tf.variable_scope(name) as vs: + self.outputs = model_fn(self.inputs, **args) + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) diff --git a/tensorlayer/layers/merge.py b/tensorlayer/layers/merge.py new file mode 100644 index 00000000..bf13ac6f --- /dev/null +++ b/tensorlayer/layers/merge.py @@ -0,0 +1,130 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class ConcatLayer(Layer): + """A layer that concats multiple tensors according to given axis.. + + Parameters + ---------- + layers : list of :class:`Layer` + List of layers to concatenate. + concat_dim : int + The dimension to concatenate. + name : str + A unique layer name. + + Examples + ---------- + >>> sess = tf.InteractiveSession() + >>> x = tf.placeholder(tf.float32, shape=[None, 784]) + >>> inputs = tl.layers.InputLayer(x, name='input_layer') + >>> net1 = tl.layers.DenseLayer(inputs, 800, act=tf.nn.relu, name='relu1_1') + >>> net2 = tl.layers.DenseLayer(inputs, 300, act=tf.nn.relu, name='relu2_1') + >>> net = tl.layers.ConcatLayer([net1, net2], 1, name ='concat_layer') + ... InputLayer input_layer (?, 784) + ... DenseLayer relu1_1: 800, relu + ... DenseLayer relu2_1: 300, relu + ... ConcatLayer concat_layer, 1100 + >>> tl.layers.initialize_global_variables(sess) + >>> net.print_params() + ... param 0: (784, 800) (mean: 0.000021, median: -0.000020 std: 0.035525) + ... param 1: (800,) (mean: 0.000000, median: 0.000000 std: 0.000000) + ... param 2: (784, 300) (mean: 0.000000, median: -0.000048 std: 0.042947) + ... param 3: (300,) (mean: 0.000000, median: 0.000000 std: 0.000000) + ... num of params: 863500 + >>> net.print_layers() + ... layer 0: ("Relu:0", shape=(?, 800), dtype=float32) + ... layer 1: Tensor("Relu_1:0", shape=(?, 300), dtype=float32) + + """ + + def __init__( + self, + layers, + concat_dim=1, + name='concat_layer', + ): + Layer.__init__(self, name=name) + self.inputs = [] + for l in layers: + self.inputs.append(l.outputs) + try: # TF1.0 + self.outputs = tf.concat(self.inputs, concat_dim, name=name) + except: # TF0.12 + self.outputs = tf.concat(concat_dim, self.inputs, name=name) + + logging.info("ConcatLayer %s: axis: %d" % (self.name, concat_dim)) + + self.all_layers = list(layers[0].all_layers) + self.all_params = list(layers[0].all_params) + self.all_drop = dict(layers[0].all_drop) + + for i in range(1, len(layers)): + self.all_layers.extend(list(layers[i].all_layers)) + self.all_params.extend(list(layers[i].all_params)) + self.all_drop.update(dict(layers[i].all_drop)) + + self.all_layers = list_remove_repeat(self.all_layers) + self.all_params = list_remove_repeat(self.all_params) + #self.all_drop = list_remove_repeat(self.all_drop) # it is a dict + + +class ElementwiseLayer(Layer): + """A layer that combines multiple :class:`Layer` that have the same output shapes + according to an element-wise operation. + + Parameters + ---------- + layers : list of :class:`Layer` + The list of layers to combine. + combine_fn : a TensorFlow element-wise combine function + e.g. AND is ``tf.minimum`` ; OR is ``tf.maximum`` ; ADD is ``tf.add`` ; MUL is ``tf.multiply`` and so on. + See `TensorFlow Math API `__ . + name : str + A unique layer name. + + Examples + -------- + AND Logic + + >>> net_0 = tl.layers.DenseLayer(net_0, n_units=500, + ... act = tf.nn.relu, name='net_0') + >>> net_1 = tl.layers.DenseLayer(net_1, n_units=500, + ... act = tf.nn.relu, name='net_1') + >>> net_com = tl.layers.ElementwiseLayer(layer = [net_0, net_1], + ... combine_fn = tf.minimum, + ... name = 'combine_layer') + + """ + + def __init__( + self, + layers, + combine_fn=tf.minimum, + name='elementwise_layer', + ): + Layer.__init__(self, name=name) + + logging.info("ElementwiseLayer %s: size:%s fn:%s" % (self.name, layers[0].outputs.get_shape(), combine_fn.__name__)) + + self.outputs = layers[0].outputs + # logging.info(self.outputs._shape, type(self.outputs._shape)) + for l in layers[1:]: + assert str(self.outputs.get_shape()) == str( + l.outputs.get_shape()), "Hint: the input shapes should be the same. %s != %s" % (self.outputs.get_shape(), str(l.outputs.get_shape())) + self.outputs = combine_fn(self.outputs, l.outputs, name=name) + + self.all_layers = list(layers[0].all_layers) + self.all_params = list(layers[0].all_params) + self.all_drop = dict(layers[0].all_drop) + + for i in range(1, len(layers)): + self.all_layers.extend(list(layers[i].all_layers)) + self.all_params.extend(list(layers[i].all_params)) + self.all_drop.update(dict(layers[i].all_drop)) + + self.all_layers = list_remove_repeat(self.all_layers) + self.all_params = list_remove_repeat(self.all_params) + # self.all_drop = list_remove_repeat(self.all_drop) diff --git a/tensorlayer/layers/normalization.py b/tensorlayer/layers/normalization.py new file mode 100644 index 00000000..0c0594d7 --- /dev/null +++ b/tensorlayer/layers/normalization.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class LocalResponseNormLayer(Layer): + """The :class:`LocalResponseNormLayer` layer is for Local Response Normalization. + See ``tf.nn.local_response_normalization`` or ``tf.nn.lrn`` for new TF version. + The 4-D input tensor is a 3-D array of 1-D vectors (along the last dimension), and each vector is normalized independently. + Within a given vector, each component is divided by the weighted square-sum of inputs within depth_radius. + + Parameters + ----------- + layer : :class:`Layer` + The previous layer with a 4D output shape. + depth_radius : int + Depth radius. 0-D. Half-width of the 1-D normalization window. + bias : float + An offset which is usually positive and shall avoid dividing by 0. + alpha : float + A scale factor which is usually positive. + beta : float + An exponent. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + depth_radius=None, + bias=None, + alpha=None, + beta=None, + name='lrn_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("LocalResponseNormLayer %s: depth_radius: %d, bias: %f, alpha: %f, beta: %f" % (self.name, depth_radius, bias, alpha, beta)) + with tf.variable_scope(name): + self.outputs = tf.nn.lrn(self.inputs, depth_radius=depth_radius, bias=bias, alpha=alpha, beta=beta) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +class BatchNormLayer(Layer): + """ + The :class:`BatchNormLayer` is a batch normalization layer for both fully-connected and convolution outputs. + See ``tf.nn.batch_normalization`` and ``tf.nn.moments``. + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + decay : float + A decay factor for `ExponentialMovingAverage`. + Suggest to use a large value for large dataset. + epsilon : float + Eplison. + act : activation function + The activation function of this layer. + is_train : boolean + Is being used for training or inference. + beta_init : initializer + The initializer for initializing beta. + gamma_init : initializer + The initializer for initializing gamma. + dtype : TensorFlow dtype + tf.float32 (default) or tf.float16. + name : str + A unique layer name. + + References + ---------- + - `Source `__ + - `stackoverflow `__ + + """ + + def __init__( + self, + layer, + decay=0.9, + epsilon=0.00001, + act=tf.identity, + is_train=False, + beta_init=tf.zeros_initializer, + gamma_init=tf.random_normal_initializer(mean=1.0, stddev=0.002), + name='batchnorm_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("BatchNormLayer %s: decay:%f epsilon:%f act:%s is_train:%s" % (self.name, decay, epsilon, act.__name__, is_train)) + x_shape = self.inputs.get_shape() + params_shape = x_shape[-1:] + + from tensorflow.python.training import moving_averages + from tensorflow.python.ops import control_flow_ops + + with tf.variable_scope(name) as vs: + axis = list(range(len(x_shape) - 1)) + + # 1. beta, gamma + if tf.__version__ > '0.12.1' and beta_init == tf.zeros_initializer: + beta_init = beta_init() + beta = tf.get_variable('beta', shape=params_shape, initializer=beta_init, dtype=D_TYPE, trainable=is_train) + + gamma = tf.get_variable( + 'gamma', + shape=params_shape, + initializer=gamma_init, + dtype=D_TYPE, + trainable=is_train, + ) + + # 2. + if tf.__version__ > '0.12.1': + moving_mean_init = tf.zeros_initializer() + else: + moving_mean_init = tf.zeros_initializer + moving_mean = tf.get_variable('moving_mean', params_shape, initializer=moving_mean_init, dtype=D_TYPE, trainable=False) + moving_variance = tf.get_variable( + 'moving_variance', + params_shape, + initializer=tf.constant_initializer(1.), + dtype=D_TYPE, + trainable=False, + ) + + # 3. + # These ops will only be preformed when training. + mean, variance = tf.nn.moments(self.inputs, axis) + try: # TF12 + update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay, zero_debias=False) # if zero_debias=True, has bias + update_moving_variance = moving_averages.assign_moving_average( + moving_variance, variance, decay, zero_debias=False) # if zero_debias=True, has bias + # logging.info("TF12 moving") + except Exception as e: # TF11 + update_moving_mean = moving_averages.assign_moving_average(moving_mean, mean, decay) + update_moving_variance = moving_averages.assign_moving_average(moving_variance, variance, decay) + # logging.info("TF11 moving") + + def mean_var_with_update(): + with tf.control_dependencies([update_moving_mean, update_moving_variance]): + return tf.identity(mean), tf.identity(variance) + + if is_train: + mean, var = mean_var_with_update() + self.outputs = act(tf.nn.batch_normalization(self.inputs, mean, var, beta, gamma, epsilon)) + else: + self.outputs = act(tf.nn.batch_normalization(self.inputs, moving_mean, moving_variance, beta, gamma, epsilon)) + + variables = [beta, gamma, moving_mean, moving_variance] + + # logging.info(len(variables)) + # for idx, v in enumerate(variables): + # logging.info(" var {:3}: {:15} {}".format(idx, str(v.get_shape()), v)) + # exit() + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) + + +class InstanceNormLayer(Layer): + """The :class:`InstanceNormLayer` class is a for instance normalization. + + Parameters + ----------- + layer : :class:`Layer` + The previous layer. + act : activation function. + The activation function of this layer. + epsilon : float + Eplison. + name : str + A unique layer name + + """ + + def __init__( + self, + layer, + act=tf.identity, + epsilon=1e-5, + name='instan_norm', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("InstanceNormLayer %s: epsilon:%f act:%s" % (self.name, epsilon, act.__name__)) + + with tf.variable_scope(name) as vs: + mean, var = tf.nn.moments(self.inputs, [1, 2], keep_dims=True) + scale = tf.get_variable('scale', [self.inputs.get_shape()[-1]], initializer=tf.truncated_normal_initializer(mean=1.0, stddev=0.02), dtype=D_TYPE) + offset = tf.get_variable('offset', [self.inputs.get_shape()[-1]], initializer=tf.constant_initializer(0.0), dtype=D_TYPE) + self.outputs = scale * tf.div(self.inputs - mean, tf.sqrt(var + epsilon)) + offset + self.outputs = act(self.outputs) + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) + + +class LayerNormLayer(Layer): + """ + The :class:`LayerNormLayer` class is for layer normalization, see `tf.contrib.layers.layer_norm `__. + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + act : activation function + The activation function of this layer. + others : _ + `tf.contrib.layers.layer_norm `__. + + """ + + def __init__(self, + layer, + center=True, + scale=True, + act=tf.identity, + reuse=None, + variables_collections=None, + outputs_collections=None, + trainable=True, + begin_norm_axis=1, + begin_params_axis=-1, + name='layernorm'): + + if tf.__version__ < "1.3": + raise Exception("Please use TF 1.3+") + + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("LayerNormLayer %s: act:%s" % (self.name, act.__name__)) + with tf.variable_scope(name) as vs: + self.outputs = tf.contrib.layers.layer_norm( + self.inputs, + center=center, + scale=scale, + activation_fn=act, + reuse=reuse, + variables_collections=variables_collections, + outputs_collections=outputs_collections, + trainable=trainable, + begin_norm_axis=begin_norm_axis, + begin_params_axis=begin_params_axis, + scope='var', + ) + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) diff --git a/tensorlayer/layers/object_detection.py b/tensorlayer/layers/object_detection.py new file mode 100644 index 00000000..88d58f20 --- /dev/null +++ b/tensorlayer/layers/object_detection.py @@ -0,0 +1,51 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class ROIPoolingLayer(Layer): + """ + The region of interest pooling layer. + + Parameters + ----------- + layer : :class:`Layer` + The previous layer. + rois : tuple of int + Regions of interest in the format of (feature map index, upper left, bottom right). + pool_width : int + The size of the pooling sections. + pool_width : int + The size of the pooling sections. + name : str + A unique layer name. + + Notes + ----------- + - This implementation is imported from `Deepsense-AI `__ . + - Please install it by the instruction `HERE `__. + + """ + + def __init__( + self, + layer, + rois, + pool_height=2, + pool_width=2, + name='roipooling_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("ROIPoolingLayer %s: (%d, %d)" % (self.name, pool_height, pool_width)) + try: + from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import roi_pooling + except Exception as e: + logging.info(e) + logging.info("HINT: 1. https://github.com/deepsense-ai/roi-pooling 2. tensorlayer/third_party/roi_pooling") + self.outputs = roi_pooling(self.inputs, rois, pool_height, pool_width) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) diff --git a/tensorlayer/layers/padding.py b/tensorlayer/layers/padding.py new file mode 100644 index 00000000..54a8561d --- /dev/null +++ b/tensorlayer/layers/padding.py @@ -0,0 +1,41 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class PadLayer(Layer): + """ + The :class:`PadLayer` class is a padding layer for any mode and dimension. + Please see `tf.pad `__ for usage. + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + paddings : Tensor + The int32 values to pad. + mode : str + "CONSTANT", "REFLECT", or "SYMMETRIC" (case-insensitive). + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + paddings, + mode='CONSTANT', + name='pad_layer', + ): + Layer.__init__(self, name=name) + assert paddings is not None, "paddings should be a Tensor of type int32. see https://www.tensorflow.org/api_docs/python/tf/pad" + self.inputs = layer.outputs + logging.info("PadLayer %s: paddings:%s mode:%s" % (self.name, list(paddings), mode)) + + self.outputs = tf.pad(self.inputs, paddings=paddings, mode=mode, name=name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) diff --git a/tensorlayer/layers/pooling.py b/tensorlayer/layers/pooling.py new file mode 100644 index 00000000..b0cecc54 --- /dev/null +++ b/tensorlayer/layers/pooling.py @@ -0,0 +1,267 @@ +# -*- coding: utf-8 -*- + +import copy + +from .core import * + + +class PoolLayer(Layer): + """ + The :class:`PoolLayer` class is a Pooling layer. + You can choose ``tf.nn.max_pool`` and ``tf.nn.avg_pool`` for 2D input or + ``tf.nn.max_pool3d`` and ``tf.nn.avg_pool3d`` for 3D input. + + Parameters + ---------- + layer : :class:`Layer` + The previous layer. + ksize : tuple of int + The size of the window for each dimension of the input tensor. + Note that: len(ksize) >= 4. + strides : tuple of int + The stride of the sliding window for each dimension of the input tensor. + Note that: len(strides) >= 4. + padding : str + The padding algorithm type: "SAME" or "VALID". + pool : pooling function + One of ``tf.nn.max_pool``, ``tf.nn.avg_pool``, ``tf.nn.max_pool3d`` and ``f.nn.avg_pool3d``. + See `TensorFlow pooling APIs `__ + name : str + A unique layer name. + + Examples + -------- + - see :class:`Conv2dLayer`. + + """ + + def __init__( + self, + layer=None, + ksize=(1, 2, 2, 1), + strides=(1, 2, 2, 1), + padding='SAME', + pool=tf.nn.max_pool, + name='pool_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("PoolLayer %s: ksize:%s strides:%s padding:%s pool:%s" % (self.name, str(ksize), str(strides), padding, pool.__name__)) + + self.outputs = pool(self.inputs, ksize=ksize, strides=strides, padding=padding, name=name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +def maxpool1d(net, filter_size=3, strides=2, padding='valid', data_format='channels_last', name=None): + """Wrapper for `tf.layers.max_pooling1d `__ . + + Parameters + ---------- + net : :class:`Layer` + The previous layer with a output rank as 3. + filter_size : tuple of int + Pooling window size. + strides : tuple of int + Strides of the pooling operation. + padding : str + The padding method: 'valid' or 'same'. + data_format : str + One of `channels_last` (default) or `channels_first`. + The ordering of the dimensions must match the inputs. + channels_last corresponds to inputs with the shape (batch, length, channels); + while channels_first corresponds to inputs with shape (batch, channels, length). + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A max pooling 1-D layer with a output rank as 3. + + """ + logging.info("MaxPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) + outputs = tf.layers.max_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) + + net_new = copy.copy(net) + net_new.outputs = outputs + net_new.all_layers.extend([outputs]) + return net_new + + +def meanpool1d(net, filter_size=3, strides=2, padding='valid', data_format='channels_last', name=None): + """Wrapper for `tf.layers.average_pooling1d `__ . + + Parameters + ------------ + net : :class:`Layer` + The previous layer with a output rank as 3. + filter_size : tuple of int + Pooling window size. + strides : tuple of int + Strides of the pooling operation. + padding : str + The padding method: 'valid' or 'same'. + data_format : str + One of `channels_last` (default) or `channels_first`. + The ordering of the dimensions must match the inputs. + channels_last corresponds to inputs with the shape (batch, length, channels); + while channels_first corresponds to inputs with shape (batch, channels, length). + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A mean pooling 1-D layer with a output rank as 3. + + """ + logging.info("MeanPool1d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) + outputs = tf.layers.average_pooling1d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) + + net_new = copy.copy(net) + net_new.outputs = outputs + net_new.all_layers.extend([outputs]) + return net_new + + +def maxpool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='maxpool'): + """Wrapper for :class:`PoolLayer`. + + Parameters + ----------- + net : :class:`Layer` + The previous layer with a output rank as 4. + filter_size : tuple of int + (height, width) for filter size. + strides : tuple of int + (height, width) for strides. + padding : str + The padding method: 'valid' or 'same'. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A max pooling 2-D layer with a output rank as 4. + + """ + if strides is None: + strides = filter_size + assert len(strides) == 2, "len(strides) should be 2, MaxPool2d and PoolLayer are different." + net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.max_pool, name=name) + return net + + +def meanpool2d(net, filter_size=(3, 3), strides=(2, 2), padding='SAME', name='meanpool'): + """Wrapper for :class:`PoolLayer`. + + Parameters + ----------- + net : :class:`Layer` + The previous layer with a output rank as 4. + filter_size : tuple of int + (height, width) for filter size. + strides : tuple of int + (height, width) for strides. + padding : str + The padding method: 'valid' or 'same'. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A mean pooling 2-D layer with a output rank as 4. + + """ + if strides is None: + strides = filter_size + assert len(strides) == 2, "len(strides) should be 2, MeanPool2d and PoolLayer are different." + net = PoolLayer(net, ksize=[1, filter_size[0], filter_size[1], 1], strides=[1, strides[0], strides[1], 1], padding=padding, pool=tf.nn.avg_pool, name=name) + return net + + +def maxpool3d(net, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='maxpool3d'): + """Wrapper for `tf.layers.max_pooling3d `__ . + + Parameters + ------------ + net : :class:`Layer` + The previous layer with a output rank as 5. + filter_size : tuple of int + Pooling window size. + strides : tuple of int + Strides of the pooling operation. + padding : str + The padding method: 'valid' or 'same'. + data_format : str + One of `channels_last` (default) or `channels_first`. + The ordering of the dimensions must match the inputs. + channels_last corresponds to inputs with the shape (batch, length, channels); + while channels_first corresponds to inputs with shape (batch, channels, length). + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A max pooling 3-D layer with a output rank as 5. + + """ + logging.info("MaxPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) + outputs = tf.layers.max_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) + + net_new = copy.copy(net) + net_new.outputs = outputs + net_new.all_layers.extend([outputs]) + return net_new + + +def meanpool3d(net, filter_size=(3, 3, 3), strides=(2, 2, 2), padding='valid', data_format='channels_last', name='meanpool3d'): + """Wrapper for `tf.layers.average_pooling3d `__ + + Parameters + ------------ + net : :class:`Layer` + The previous layer with a output rank as 5. + filter_size : tuple of int + Pooling window size. + strides : tuple of int + Strides of the pooling operation. + padding : str + The padding method: 'valid' or 'same'. + data_format : str + One of `channels_last` (default) or `channels_first`. + The ordering of the dimensions must match the inputs. + channels_last corresponds to inputs with the shape (batch, length, channels); + while channels_first corresponds to inputs with shape (batch, channels, length). + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A mean pooling 3-D layer with a output rank as 5. + + """ + logging.info("MeanPool3d %s: filter_size:%s strides:%s padding:%s" % (name, str(filter_size), str(strides), str(padding))) + outputs = tf.layers.average_pooling3d(net.outputs, filter_size, strides, padding=padding, data_format=data_format, name=name) + + net_new = copy.copy(net) + net_new.outputs = outputs + net_new.all_layers.extend([outputs]) + return net_new + + +# Alias +MaxPool1d = maxpool1d +MaxPool2d = maxpool2d +MaxPool3d = maxpool3d +MeanPool1d = meanpool1d +MeanPool2d = meanpool2d +MeanPool3d = meanpool3d diff --git a/tensorlayer/layers/recurrent.py b/tensorlayer/layers/recurrent.py new file mode 100644 index 00000000..0b0ca715 --- /dev/null +++ b/tensorlayer/layers/recurrent.py @@ -0,0 +1,1601 @@ +# -*- coding: utf-8 -*- + +import inspect + +from .core import * + + +class RNNLayer(Layer): + """ + The :class:`RNNLayer` class is a fixed length recurrent layer for implementing vanilla RNN, + LSTM, GRU and etc. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + cell_fn : TensorFlow cell function + A TensorFlow core RNN cell + - See `RNN Cells in TensorFlow `__ + - Note TF1.0+ and TF1.0- are different + cell_init_args : dictionary + The arguments for the cell function. + n_hidden : int + The number of hidden units in the layer. + initializer : initializer + The initializer for initializing the model parameters. + n_steps : int + The fixed sequence length. + initial_state : None or RNN State + If None, `initial_state` is zero state. + return_last : boolean + Whether return last output or all outputs in each step. + - If True, return the last output, "Sequence input and single output" + - If False, return all outputs, "Synced sequence input and output" + - In other word, if you want to stack more RNNs on this layer, set to False. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. + name : str + A unique layer name. + + Attributes + ---------- + outputs : Tensor + The output of this layer. + + final_state : Tensor or StateTuple + The finial state of this layer. + - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. + - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. + - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. + + initial_state : Tensor or StateTuple + The initial state of this layer. + - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. + + batch_size : int or Tensor + It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. + + Examples + -------- + For language modeling, see `PTB example `__ + + >>> input_data = tf.placeholder(tf.int32, [batch_size, num_steps]) + >>> net = tl.layers.EmbeddingInputlayer( + ... inputs = input_data, + ... vocabulary_size = vocab_size, + ... embedding_size = hidden_size, + ... E_init = tf.random_uniform_initializer(-init_scale, init_scale), + ... name ='embedding_layer') + >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop1') + >>> net = tl.layers.RNNLayer(net, + ... cell_fn=tf.contrib.rnn.BasicLSTMCell, + ... cell_init_args={'forget_bias': 0.0},# 'state_is_tuple': True}, + ... n_hidden=hidden_size, + ... initializer=tf.random_uniform_initializer(-init_scale, init_scale), + ... n_steps=num_steps, + ... return_last=False, + ... name='basic_lstm_layer1') + >>> lstm1 = net + >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop2') + >>> net = tl.layers.RNNLayer(net, + ... cell_fn=tf.contrib.rnn.BasicLSTMCell, + ... cell_init_args={'forget_bias': 0.0}, # 'state_is_tuple': True}, + ... n_hidden=hidden_size, + ... initializer=tf.random_uniform_initializer(-init_scale, init_scale), + ... n_steps=num_steps, + ... return_last=False, + ... return_seq_2d=True, + ... name='basic_lstm_layer2') + >>> lstm2 = net + >>> net = tl.layers.DropoutLayer(net, keep=keep_prob, is_fix=True, is_train=is_train, name='drop3') + >>> net = tl.layers.DenseLayer(net, n_units=vocab_size, name='output') + + For CNN+LSTM + + >>> x = tf.placeholder(tf.float32, shape=[batch_size, image_size, image_size, 1]) + >>> net = tl.layers.InputLayer(x, name='input_layer') + >>> net = tl.layers.Conv2d(net, 32, (5, 5), (2, 2), tf.nn.relu, name='cnn1') + >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool1') + >>> net = tl.layers.Conv2d(net, 10, (5, 5), (2, 2), tf.nn.relu, name='cnn2') + >>> net = tl.layers.MaxPool2d(net, (2, 2), (2, 2), name='pool2') + >>> net = tl.layers.FlattenLayer(net, name='flatten_layer') + >>> net = tl.layers.ReshapeLayer(net, shape=[-1, num_steps, int(net.outputs._shape[-1])]) + >>> rnn1 = tl.layers.RNNLayer(net, + ... cell_fn=tf.nn.rnn_cell.LSTMCell, + ... n_hidden=200, + ... n_steps=num_steps, + ... return_last=False, + ... return_seq_2d=True, + ... name='rnn') + >>> net = tl.layers.DenseLayer(rnn1, 3, name='output') + + Notes + ----- + Input dimension should be rank 3 : [batch_size, n_steps, n_features], if no, please see :class:`ReshapeLayer`. + + References + ---------- + - `Neural Network RNN Cells in TensorFlow `__ + - `tensorflow/python/ops/rnn.py `__ + - `tensorflow/python/ops/rnn_cell.py `__ + - see TensorFlow tutorial ``ptb_word_lm.py``, TensorLayer tutorials ``tutorial_ptb_lstm*.py`` and ``tutorial_generate_text.py`` + + """ + + def __init__( + self, + layer, + cell_fn, + cell_init_args={}, + n_hidden=100, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + n_steps=5, + initial_state=None, + return_last=False, + return_seq_2d=False, + name='rnn_layer', + ): + Layer.__init__(self, name=name) + if cell_fn is None: + raise Exception("Please put in cell_fn") + if 'GRU' in cell_fn.__name__: + try: + cell_init_args.pop('state_is_tuple') + except: + pass + + self.inputs = layer.outputs + + logging.info("RNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s " % (self.name, n_hidden, n_steps, self.inputs.get_shape().ndims, + self.inputs.get_shape(), cell_fn.__name__)) + # You can get the dimension by .get_shape() or ._shape, and check the + # dimension by .with_rank() as follow. + # self.inputs.get_shape().with_rank(2) + # self.inputs.get_shape().with_rank(3) + + # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] + try: + self.inputs.get_shape().with_rank(3) + except: + raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") + + # is_reshape : boolean (deprecate) + # Reshape the inputs to 3 dimension tensor.\n + # If input is[batch_size, n_steps, n_features], we do not need to reshape it.\n + # If input is [batch_size * n_steps, n_features], we need to reshape it. + # if is_reshape: + # self.inputs = tf.reshape(self.inputs, shape=[-1, n_steps, int(self.inputs._shape[-1])]) + + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(self.inputs)[0] + logging.info(" non specified batch_size, uses a tensor instead.") + self.batch_size = batch_size + + # Simplified version of tensorflow.models.rnn.rnn.py's rnn(). + # This builds an unrolled LSTM for tutorial purposes only. + # In general, use the rnn() or state_saving_rnn() from rnn.py. + # + # The alternative version of the code below is: + # + # from tensorflow.models.rnn import rnn + # inputs = [tf.squeeze(input_, [1]) + # for input_ in tf.split(1, num_steps, inputs)] + # outputs, state = rnn.rnn(cell, inputs, initial_state=self._initial_state) + outputs = [] + if 'reuse' in inspect.getargspec(cell_fn.__init__).args: + self.cell = cell = cell_fn(num_units=n_hidden, reuse=tf.get_variable_scope().reuse, **cell_init_args) + else: + self.cell = cell = cell_fn(num_units=n_hidden, **cell_init_args) + if initial_state is None: + self.initial_state = cell.zero_state(batch_size, dtype=D_TYPE) #dtype=tf.float32) # 1.2.3 + state = self.initial_state + # with tf.variable_scope("model", reuse=None, initializer=initializer): + with tf.variable_scope(name, initializer=initializer) as vs: + for time_step in range(n_steps): + if time_step > 0: tf.get_variable_scope().reuse_variables() + (cell_output, state) = cell(self.inputs[:, time_step, :], state) + outputs.append(cell_output) + + # Retrieve just the RNN variables. + # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] + rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + logging.info(" n_params : %d" % (len(rnn_variables))) + + if return_last: + # 2D Tensor [batch_size, n_hidden] + self.outputs = outputs[-1] + else: + if return_seq_2d: + # PTB tutorial: stack dense layer after that, or compute the cost from the output + # 2D Tensor [n_example, n_hidden] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden]) + + else: + # : stack more RNN layer after that + # 3D Tensor [n_example/n_steps, n_steps, n_hidden] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden]) + + self.final_state = state + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + # logging.info(type(self.outputs)) + self.all_layers.extend([self.outputs]) + self.all_params.extend(rnn_variables) + + +class BiRNNLayer(Layer): + """ + The :class:`BiRNNLayer` class is a fixed length Bidirectional recurrent layer. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + cell_fn : TensorFlow cell function + A TensorFlow core RNN cell. + - See `RNN Cells in TensorFlow `__. + - Note TF1.0+ and TF1.0- are different. + cell_init_args : dictionary + The arguments for the cell function. + n_hidden : int + The number of hidden units in the layer. + initializer : initializer + The initializer for initializing the model parameters. + n_steps : int + The fixed sequence length. + fw_initial_state : None or forward RNN State + If None, `initial_state` is zero state. + bw_initial_state : None or backward RNN State + If None, `initial_state` is zero state. + dropout : tuple of float or int + The input and output keep probability (input_keep_prob, output_keep_prob). + If one int, input and output keep probability are the same. + n_layer : int + The number of RNN layers, default is 1. + return_last : boolean + Whether return last output or all outputs in each step. + - If True, return the last output, "Sequence input and single output" + - If False, return all outputs, "Synced sequence input and output" + - In other word, if you want to stack more RNNs on this layer, set to False. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. + name : str + A unique layer name. + + Attributes + ---------- + outputs : tensor + The output of this layer. + fw(bw)_final_state : tensor or StateTuple + The finial state of this layer. + - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. + - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. + - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. + fw(bw)_initial_state : tensor or StateTuple + The initial state of this layer. + - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. + batch_size : int or tensor + It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. + + Notes + ----- + Input dimension should be rank 3 : [batch_size, n_steps, n_features]. If not, please see :class:`ReshapeLayer`. + For predicting, the sequence length has to be the same with the sequence length of training, while, for normal + RNN, we can use sequence length of 1 for predicting. + + References + ---------- + `Source `__ + + """ + + def __init__( + self, + layer, + cell_fn, + cell_init_args={ + 'use_peepholes': True, + 'state_is_tuple': True + }, + n_hidden=100, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + n_steps=5, + fw_initial_state=None, + bw_initial_state=None, + dropout=None, + n_layer=1, + return_last=False, + return_seq_2d=False, + name='birnn_layer', + ): + Layer.__init__(self, name=name) + if cell_fn is None: + raise Exception("Please put in cell_fn") + if 'GRU' in cell_fn.__name__: + try: + cell_init_args.pop('state_is_tuple') + except: + pass + + self.inputs = layer.outputs + + logging.info("BiRNNLayer %s: n_hidden:%d n_steps:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d " % (self.name, n_hidden, n_steps, + self.inputs.get_shape().ndims, + self.inputs.get_shape(), + cell_fn.__name__, dropout, n_layer)) + + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + + if fixed_batch_size.value: + self.batch_size = fixed_batch_size.value + logging.info(" RNN batch_size (concurrent processes): %d" % self.batch_size) + else: + from tensorflow.python.ops import array_ops + self.batch_size = array_ops.shape(self.inputs)[0] + logging.info(" non specified batch_size, uses a tensor instead.") + + # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] + try: + self.inputs.get_shape().with_rank(3) + except: + raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps, n_features]") + + with tf.variable_scope(name, initializer=initializer) as vs: + rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) + # Apply dropout + if dropout: + if type(dropout) in [tuple, list]: + in_keep_prob = dropout[0] + out_keep_prob = dropout[1] + elif isinstance(dropout, float): + in_keep_prob, out_keep_prob = dropout, dropout + else: + raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") + try: # TF 1.0 + DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper + except: + DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper + cell_creator = lambda: DropoutWrapper_fn(rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=1.0) # out_keep_prob) + else: + cell_creator = rnn_creator + self.fw_cell = cell_creator() + self.bw_cell = cell_creator() + + # Apply multiple layers + if n_layer > 1: + try: # TF1.0 + MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell + except: + MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell + + try: + self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) + self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) + except: + self.fw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) + self.bw_cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) + + # Initial state of RNN + if fw_initial_state is None: + self.fw_initial_state = self.fw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32) + else: + self.fw_initial_state = fw_initial_state + if bw_initial_state is None: + self.bw_initial_state = self.bw_cell.zero_state(self.batch_size, dtype=D_TYPE) # dtype=tf.float32) + else: + self.bw_initial_state = bw_initial_state + # exit() + # Feedforward to MultiRNNCell + try: # TF1.0 + list_rnn_inputs = tf.unstack(self.inputs, axis=1) + except: # TF0.12 + list_rnn_inputs = tf.unpack(self.inputs, axis=1) + + try: # TF1.0 + bidirectional_rnn_fn = tf.contrib.rnn.static_bidirectional_rnn + except: + bidirectional_rnn_fn = tf.nn.bidirectional_rnn + outputs, fw_state, bw_state = bidirectional_rnn_fn( # outputs, fw_state, bw_state = tf.contrib.rnn.static_bidirectional_rnn( + cell_fw=self.fw_cell, + cell_bw=self.bw_cell, + inputs=list_rnn_inputs, + initial_state_fw=self.fw_initial_state, + initial_state_bw=self.bw_initial_state) + + if return_last: + raise Exception("Do not support return_last at the moment.") + self.outputs = outputs[-1] + else: + self.outputs = outputs + if return_seq_2d: + # 2D Tensor [n_example, n_hidden] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden * 2]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden * 2]) + else: + # : stack more RNN layer after that + # 3D Tensor [n_example/n_steps, n_steps, n_hidden] + + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, n_hidden * 2]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_steps, n_hidden * 2]) + self.fw_final_state = fw_state + self.bw_final_state = bw_state + + # Retrieve just the RNN variables. + rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + logging.info(" n_params : %d" % (len(rnn_variables))) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(rnn_variables) + + +class ConvRNNCell(object): + """Abstract object representing an Convolutional RNN Cell.""" + + def __call__(self, inputs, state, scope=None): + """Run this RNN cell on inputs, starting from the given state.""" + raise NotImplementedError("Abstract method") + + @property + def state_size(self): + """size(s) of state(s) used by this cell.""" + raise NotImplementedError("Abstract method") + + @property + def output_size(self): + """Integer or TensorShape: size of outputs produced by this cell.""" + raise NotImplementedError("Abstract method") + + def zero_state(self, batch_size): + """Return zero-filled state tensor(s). + Args: + batch_size: int, float, or unit Tensor representing the batch size. + Returns: + tensor of shape '[batch_size x shape[0] x shape[1] x num_features] + filled with zeros + + """ + shape = self.shape + num_features = self.num_features + zeros = tf.zeros([batch_size, shape[0], shape[1], num_features * 2]) + return zeros + + +class BasicConvLSTMCell(ConvRNNCell): + """Basic Conv LSTM recurrent network cell. + + Parameters + ----------- + shape : tuple of int + The height and width of the cell. + filter_size : tuple of int + The height and width of the filter + num_features : int + The hidden size of the cell + forget_bias : float + The bias added to forget gates (see above). + input_size : int + Deprecated and unused. + state_is_tuple : boolen + If True, accepted and returned states are 2-tuples of the `c_state` and `m_state`. + If False, they are concatenated along the column axis. The latter behavior will soon be deprecated. + act : activation function + The activation function of this layer, tanh as default. + + """ + + def __init__(self, shape, filter_size, num_features, forget_bias=1.0, input_size=None, state_is_tuple=False, act=tf.nn.tanh): + """Initialize the basic Conv LSTM cell.""" + # if not state_is_tuple: + # logging.warn("%s: Using a concatenated state is slower and will soon be " + # "deprecated. Use state_is_tuple=True.", self) + if input_size is not None: + logging.warn("%s: The input_size parameter is deprecated.", self) + self.shape = shape + self.filter_size = filter_size + self.num_features = num_features + self._forget_bias = forget_bias + self._state_is_tuple = state_is_tuple + self._activation = act + + @property + def state_size(self): + """State size of the LSTMStateTuple.""" + return (LSTMStateTuple(self._num_units, self._num_units) if self._state_is_tuple else 2 * self._num_units) + + @property + def output_size(self): + """Number of units in outputs.""" + return self._num_units + + def __call__(self, inputs, state, scope=None): + """Long short-term memory cell (LSTM).""" + with tf.variable_scope(scope or type(self).__name__): # "BasicLSTMCell" + # Parameters of gates are concatenated into one multiply for efficiency. + if self._state_is_tuple: + c, h = state + else: + # print state + # c, h = tf.split(3, 2, state) + c, h = tf.split(state, 2, 3) + concat = _conv_linear([inputs, h], self.filter_size, self.num_features * 4, True) + + # i = input_gate, j = new_input, f = forget_gate, o = output_gate + # i, j, f, o = tf.split(3, 4, concat) + i, j, f, o = tf.split(concat, 4, 3) + + new_c = (c * tf.nn.sigmoid(f + self._forget_bias) + tf.nn.sigmoid(i) * self._activation(j)) + new_h = self._activation(new_c) * tf.nn.sigmoid(o) + + if self._state_is_tuple: + new_state = LSTMStateTuple(new_c, new_h) + else: + new_state = tf.concat([new_c, new_h], 3) + return new_h, new_state + + +def _conv_linear(args, filter_size, num_features, bias, bias_start=0.0, scope=None): + """convolution: + + Parameters + ---------- + args : tensor + 4D Tensor or a list of 4D, batch x n, Tensors. + filter_size : tuple of int + Filter height and width. + num_features : int + Nnumber of features. + bias_start : float + Starting value to initialize the bias; 0 by default. + scope : VariableScope + For the created subgraph; defaults to "Linear". + + Returns + -------- + - A 4D Tensor with shape [batch h w num_features] + + Raises + ------- + - ValueError : if some of the arguments has unspecified or wrong shape. + + """ + # Calculate the total size of arguments on dimension 1. + total_arg_size_depth = 0 + shapes = [a.get_shape().as_list() for a in args] + for shape in shapes: + if len(shape) != 4: + raise ValueError("Linear is expecting 4D arguments: %s" % str(shapes)) + if not shape[3]: + raise ValueError("Linear expects shape[4] of arguments: %s" % str(shapes)) + else: + total_arg_size_depth += shape[3] + + dtype = [a.dtype for a in args][0] + + # Now the computation. + with tf.variable_scope(scope or "Conv"): + matrix = tf.get_variable("Matrix", [filter_size[0], filter_size[1], total_arg_size_depth, num_features], dtype=dtype) + if len(args) == 1: + res = tf.nn.conv2d(args[0], matrix, strides=[1, 1, 1, 1], padding='SAME') + else: + res = tf.nn.conv2d(tf.concat(args, 3), matrix, strides=[1, 1, 1, 1], padding='SAME') + if not bias: + return res + bias_term = tf.get_variable("Bias", [num_features], dtype=dtype, initializer=tf.constant_initializer(bias_start, dtype=dtype)) + return res + bias_term + + +class ConvLSTMLayer(Layer): + """A fixed length Convolutional LSTM layer. + + See this `paper `__ . + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + cell_shape : tuple of int + The shape of each cell width * height + filter_size : tuple of int + The size of filter width * height + cell_fn : a convolutional RNN cell + Cell function like :class:`BasicConvLSTMCell` + feature_map : int + The number of feature map in the layer. + initializer : initializer + The initializer for initializing the parameters. + n_steps : int + The sequence length. + initial_state : None or ConvLSTM State + If None, `initial_state` is zero state. + return_last : boolean + Whether return last output or all outputs in each step. + - If True, return the last output, "Sequence input and single output". + - If False, return all outputs, "Synced sequence input and output". + - In other word, if you want to stack more RNNs on this layer, set to False. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. + name : str + A unique layer name. + + Attributes + ---------- + outputs : tensor + The output of this RNN. return_last = False, outputs = all cell_output, which is the hidden state. + cell_output.get_shape() = (?, h, w, c]) + + final_state : tensor or StateTuple + The finial state of this layer. + - When state_is_tuple = False, it is the final hidden and cell states, + - When state_is_tuple = True, You can get the final state after each iteration during training, then feed it to the initial state of next iteration. + + initial_state : tensor or StateTuple + It is the initial state of this ConvLSTM layer, you can use it to initialize + your state at the beginning of each epoch or iteration according to your + training procedure. + + batch_size : int or tensor + Is int, if able to compute the batch_size, otherwise, tensor for ``?``. + + """ + + def __init__( + self, + layer, + cell_shape=None, + feature_map=1, + filter_size=(3, 3), + cell_fn=BasicConvLSTMCell, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + n_steps=5, + initial_state=None, + return_last=False, + return_seq_2d=False, + name='convlstm_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("ConvLSTMLayer %s: feature_map:%d, n_steps:%d, " + "in_dim:%d %s, cell_fn:%s " % (self.name, feature_map, n_steps, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__)) + # You can get the dimension by .get_shape() or ._shape, and check the + # dimension by .with_rank() as follow. + # self.inputs.get_shape().with_rank(2) + # self.inputs.get_shape().with_rank(3) + + # Input dimension should be rank 5 [batch_size, n_steps(max), h, w, c] + try: + self.inputs.get_shape().with_rank(5) + except: + raise Exception("RNN : Input dimension should be rank 5 : [batch_size, n_steps, input_x, " "input_y, feature_map]") + + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + logging.info(" RNN batch_size (concurrent processes): %d" % batch_size) + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(self.inputs)[0] + logging.info(" non specified batch_size, uses a tensor instead.") + self.batch_size = batch_size + + outputs = [] + self.cell = cell = cell_fn(shape=cell_shape, filter_size=filter_size, num_features=feature_map) + if initial_state is None: + self.initial_state = cell.zero_state(batch_size, dtype=D_TYPE) # dtype=tf.float32) # 1.2.3 + state = self.initial_state + # with tf.variable_scope("model", reuse=None, initializer=initializer): + with tf.variable_scope(name, initializer=initializer) as vs: + for time_step in range(n_steps): + if time_step > 0: tf.get_variable_scope().reuse_variables() + (cell_output, state) = cell(self.inputs[:, time_step, :, :, :], state) + outputs.append(cell_output) + + # Retrieve just the RNN variables. + # rnn_variables = [v for v in tf.all_variables() if v.name.startswith(vs.name)] + rnn_variables = tf.get_collection(tf.GraphKeys.VARIABLES, scope=vs.name) + + logging.info(" n_params : %d" % (len(rnn_variables))) + + if return_last: + # 2D Tensor [batch_size, n_hidden] + self.outputs = outputs[-1] + else: + if return_seq_2d: + # PTB tutorial: stack dense layer after that, or compute the cost from the output + # 4D Tensor [n_example, h, w, c] + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, cell_shape[0] * cell_shape[1] * feature_map]) + else: + # : stack more RNN layer after that + # 5D Tensor [n_example/n_steps, n_steps, h, w, c] + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_steps, cell_shape[0], cell_shape[1], feature_map]) + + self.final_state = state + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(rnn_variables) + + +# Advanced Ops for Dynamic RNN +def advanced_indexing_op(input, index): + """Advanced Indexing for Sequences, returns the outputs by given sequence lengths. + When return the last output :class:`DynamicRNNLayer` uses it to get the last outputs with the sequence lengths. + + Parameters + ----------- + input : tensor for data + With shape of [batch_size, n_step(max), n_features] + index : tensor for indexing + Sequence length in Dynamic RNN. [batch_size] + + Examples + --------- + >>> batch_size, max_length, n_features = 3, 5, 2 + >>> z = np.random.uniform(low=-1, high=1, size=[batch_size, max_length, n_features]).astype(np.float32) + >>> b_z = tf.constant(z) + >>> sl = tf.placeholder(dtype=tf.int32, shape=[batch_size]) + >>> o = advanced_indexing_op(b_z, sl) + >>> + >>> sess = tf.InteractiveSession() + >>> tl.layers.initialize_global_variables(sess) + >>> + >>> order = np.asarray([1,1,2]) + >>> print("real",z[0][order[0]-1], z[1][order[1]-1], z[2][order[2]-1]) + >>> y = sess.run([o], feed_dict={sl:order}) + >>> print("given",order) + >>> print("out", y) + ... real [-0.93021595 0.53820813] [-0.92548317 -0.77135968] [ 0.89952248 0.19149846] + ... given [1 1 2] + ... out [array([[-0.93021595, 0.53820813], + ... [-0.92548317, -0.77135968], + ... [ 0.89952248, 0.19149846]], dtype=float32)] + + References + ----------- + - Modified from TFlearn (the original code is used for fixed length rnn), `references `__. + + """ + batch_size = tf.shape(input)[0] + # max_length = int(input.get_shape()[1]) # for fixed length rnn, length is given + max_length = tf.shape(input)[1] # for dynamic_rnn, length is unknown + dim_size = int(input.get_shape()[2]) + index = tf.range(0, batch_size) * max_length + (index - 1) + flat = tf.reshape(input, [-1, dim_size]) + relevant = tf.gather(flat, index) + return relevant + + +def retrieve_seq_length_op(data): + """An op to compute the length of a sequence from input shape of [batch_size, n_step(max), n_features], + it can be used when the features of padding (on right hand side) are all zeros. + + Parameters + ----------- + data : tensor + [batch_size, n_step(max), n_features] with zero padding on right hand side. + + Examples + --------- + >>> data = [[[1],[2],[0],[0],[0]], + ... [[1],[2],[3],[0],[0]], + ... [[1],[2],[6],[1],[0]]] + >>> data = np.asarray(data) + >>> print(data.shape) + ... (3, 5, 1) + >>> data = tf.constant(data) + >>> sl = retrieve_seq_length_op(data) + >>> sess = tf.InteractiveSession() + >>> tl.layers.initialize_global_variables(sess) + >>> y = sl.eval() + ... [2 3 4] + + Multiple features + >>> data = [[[1,2],[2,2],[1,2],[1,2],[0,0]], + ... [[2,3],[2,4],[3,2],[0,0],[0,0]], + ... [[3,3],[2,2],[5,3],[1,2],[0,0]]] + >>> print(sl) + ... [4 3 4] + + References + ------------ + Borrow from `TFlearn `__. + + """ + with tf.name_scope('GetLength'): + # TF 1.0 change reduction_indices to axis + used = tf.sign(tf.reduce_max(tf.abs(data), 2)) + length = tf.reduce_sum(used, 1) + # TF < 1.0 + # used = tf.sign(tf.reduce_max(tf.abs(data), reduction_indices=2)) + # length = tf.reduce_sum(used, reduction_indices=1) + length = tf.cast(length, tf.int32) + return length + + +def retrieve_seq_length_op2(data): + """An op to compute the length of a sequence, from input shape of [batch_size, n_step(max)], + it can be used when the features of padding (on right hand side) are all zeros. + + Parameters + ----------- + data : tensor + [batch_size, n_step(max)] with zero padding on right hand side. + + Examples + -------- + >>> data = [[1,2,0,0,0], + ... [1,2,3,0,0], + ... [1,2,6,1,0]] + >>> o = retrieve_seq_length_op2(data) + >>> sess = tf.InteractiveSession() + >>> tl.layers.initialize_global_variables(sess) + >>> print(o.eval()) + ... [2 3 4] + + """ + return tf.reduce_sum(tf.cast(tf.greater(data, tf.zeros_like(data)), tf.int32), 1) + + +def retrieve_seq_length_op3(data, pad_val=0): # HangSheng: return tensor for sequence length, if input is tf.string + """Return tensor for sequence length, if input is ``tf.string``. + + """ + data_shape_size = data.get_shape().ndims + if data_shape_size == 3: + return tf.reduce_sum(tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32), 1) + elif data_shape_size == 2: + return tf.reduce_sum(tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32), 1) + elif data_shape_size == 1: + raise ValueError("retrieve_seq_length_op3: data has wrong shape!") + else: + raise ValueError("retrieve_seq_length_op3: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)) + + +def target_mask_op(data, pad_val=0): # HangSheng: return tensor for mask,if input is tf.string + """Return tensor for mask, if input is ``tf.string``. + + """ + data_shape_size = data.get_shape().ndims + if data_shape_size == 3: + return tf.cast(tf.reduce_any(tf.not_equal(data, pad_val), axis=2), dtype=tf.int32) + elif data_shape_size == 2: + return tf.cast(tf.not_equal(data, pad_val), dtype=tf.int32) + elif data_shape_size == 1: + raise ValueError("target_mask_op: data has wrong shape!") + else: + raise ValueError("target_mask_op: handling data_shape_size %s hasn't been implemented!" % (data_shape_size)) + + +class DynamicRNNLayer(Layer): + """ + The :class:`DynamicRNNLayer` class is a dynamic recurrent layer, see ``tf.nn.dynamic_rnn``. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + cell_fn : TensorFlow cell function + A TensorFlow core RNN cell + - See `RNN Cells in TensorFlow `__ + - Note TF1.0+ and TF1.0- are different + cell_init_args : dictionary + The arguments for the cell function. + n_hidden : int + The number of hidden units in the layer. + initializer : initializer + The initializer for initializing the parameters. + sequence_length : tensor, array or None + The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. + - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. + - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. + - You can also input an numpy array. + - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. + initial_state : None or RNN State + If None, `initial_state` is zero state. + dropout : tuple of float or int + The input and output keep probability (input_keep_prob, output_keep_prob). + - If one int, input and output keep probability are the same. + n_layer : int + The number of RNN layers, default is 1. + return_last : boolean + Whether return last output or all outputs in each step. + - If True, return the last output, "Sequence input and single output" + - If False, return all outputs, "Synced sequence input and output" + - In other word, if you want to stack more RNNs on this layer, set to False. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, n_hidden], for stacking multiple RNN after it. + dynamic_rnn_init_args : dictionary + The arguments for ``tf.nn.dynamic_rnn``. + name : str + A unique layer name. + + Attributes + ------------ + outputs : tensor + The output of this layer. + + final_state : tensor or StateTuple + The finial state of this layer. + - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. + - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. + - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. + + initial_state : tensor or StateTuple + The initial state of this layer. + - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. + + batch_size : int or tensor + It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. + + sequence_length : a tensor or array + The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size] + + Notes + ----- + Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. + + Examples + -------- + Synced sequence input and output, for loss function see ``tl.cost.cross_entropy_seq_with_mask``. + + >>> input_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="input") + >>> net = tl.layers.EmbeddingInputlayer( + ... inputs = input_seqs, + ... vocabulary_size = vocab_size, + ... embedding_size = embedding_size, + ... name = 'seq_embedding') + >>> net = tl.layers.DynamicRNNLayer(net, + ... cell_fn = tf.contrib.rnn.BasicLSTMCell, # for TF0.2 use tf.nn.rnn_cell.BasicLSTMCell, + ... n_hidden = embedding_size, + ... dropout = (0.7 if is_train else None), + ... sequence_length = tl.layers.retrieve_seq_length_op2(input_seqs), + ... return_seq_2d = True, # stack denselayer or compute cost after it + ... name = 'dynamicrnn') + ... net = tl.layers.DenseLayer(net, n_units=vocab_size, name="output") + + References + ---------- + - `Wild-ML Blog `__ + - `dynamic_rnn.ipynb `__ + - `tf.nn.dynamic_rnn `__ + - `tflearn rnn `__ + - ``tutorial_dynamic_rnn.py`` + + """ + + def __init__( + self, + layer, + cell_fn, #tf.nn.rnn_cell.LSTMCell, + cell_init_args={'state_is_tuple': True}, + n_hidden=256, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + sequence_length=None, + initial_state=None, + dropout=None, + n_layer=1, + return_last=False, + return_seq_2d=False, + dynamic_rnn_init_args={}, + name='dyrnn', + ): + Layer.__init__(self, name=name) + if cell_fn is None: + raise Exception("Please put in cell_fn") + if 'GRU' in cell_fn.__name__: + try: + cell_init_args.pop('state_is_tuple') + except: + pass + self.inputs = layer.outputs + + logging.info("DynamicRNNLayer %s: n_hidden:%d, in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % + (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer)) + + # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] + try: + self.inputs.get_shape().with_rank(3) + except: + raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") + + # Get the batch_size + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + logging.info(" batch_size (concurrent processes): %d" % batch_size) + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(self.inputs)[0] + logging.info(" non specified batch_size, uses a tensor instead.") + self.batch_size = batch_size + + # Creats the cell function + # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args) # HanSheng + rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) + + # Apply dropout + if dropout: + if type(dropout) in [tuple, list]: + in_keep_prob = dropout[0] + out_keep_prob = dropout[1] + elif isinstance(dropout, float): + in_keep_prob, out_keep_prob = dropout, dropout + else: + raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") + try: # TF1.0 + DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper + except: + DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper + + # cell_instance_fn1=cell_instance_fn # HanSheng + # cell_instance_fn=DropoutWrapper_fn( + # cell_instance_fn1(), + # input_keep_prob=in_keep_prob, + # output_keep_prob=out_keep_prob) + cell_creator = lambda: DropoutWrapper_fn(rnn_creator(), input_keep_prob=in_keep_prob, output_keep_prob=1.0) #out_keep_prob) + else: + cell_creator = rnn_creator + self.cell = cell_creator() + # Apply multiple layers + if n_layer > 1: + try: + MultiRNNCell_fn = tf.contrib.rnn.MultiRNNCell + except: + MultiRNNCell_fn = tf.nn.rnn_cell.MultiRNNCell + + # cell_instance_fn2=cell_instance_fn # HanSheng + try: + # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)], state_is_tuple=True) # HanSheng + self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)], state_is_tuple=True) + except: # when GRU + # cell_instance_fn=lambda: MultiRNNCell_fn([cell_instance_fn2() for _ in range(n_layer)]) # HanSheng + self.cell = MultiRNNCell_fn([cell_creator() for _ in range(n_layer)]) + + # self.cell=cell_instance_fn() # HanSheng + + # Initialize initial_state + if initial_state is None: + self.initial_state = self.cell.zero_state(batch_size, dtype=D_TYPE) # dtype=tf.float32) + else: + self.initial_state = initial_state + + # Computes sequence_length + if sequence_length is None: + try: # TF1.0 + sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)) + except: # TF0.12 + sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)) + + # Main - Computes outputs and last_states + with tf.variable_scope(name, initializer=initializer) as vs: + outputs, last_states = tf.nn.dynamic_rnn( + cell=self.cell, + # inputs=X + inputs=self.inputs, + # dtype=tf.float64, + sequence_length=sequence_length, + initial_state=self.initial_state, + **dynamic_rnn_init_args) + rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + # logging.info(" n_params : %d" % (len(rnn_variables))) + # Manage the outputs + if return_last: + # [batch_size, n_hidden] + # outputs = tf.transpose(tf.pack(outputs), [1, 0, 2]) # TF1.0 tf.pack --> tf.stack + self.outputs = advanced_indexing_op(outputs, sequence_length) + else: + # [batch_size, n_step(max), n_hidden] + # self.outputs = result[0]["outputs"] + # self.outputs = outputs # it is 3d, but it is a list + if return_seq_2d: + # PTB tutorial: + # 2D Tensor [n_example, n_hidden] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, n_hidden]) + else: + # : + # 3D Tensor [batch_size, n_steps(max), n_hidden] + max_length = tf.shape(outputs)[1] + batch_size = tf.shape(outputs)[0] + + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, n_hidden]) + # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, n_hidden]) + + # Final state + self.final_state = last_states + + self.sequence_length = sequence_length + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + self.all_layers.extend([self.outputs]) + self.all_params.extend(rnn_variables) + + +class BiDynamicRNNLayer(Layer): + """ + The :class:`BiDynamicRNNLayer` class is a RNN layer, you can implement vanilla RNN, + LSTM and GRU with it. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + cell_fn : TensorFlow cell function + A TensorFlow core RNN cell + - See `RNN Cells in TensorFlow `__. + - Note TF1.0+ and TF1.0- are different. + cell_init_args : dictionary + The arguments for the cell initializer. + n_hidden : int + The number of hidden units in the layer. + initializer : initializer + The initializer for initializing the parameters. + sequence_length : tensor, array or None + The sequence length of each row of input data, see ``Advanced Ops for Dynamic RNN``. + - If None, it uses ``retrieve_seq_length_op`` to compute the sequence length, i.e. when the features of padding (on right hand side) are all zeros. + - If using word embedding, you may need to compute the sequence length from the ID array (the integer features before word embedding) by using ``retrieve_seq_length_op2`` or ``retrieve_seq_length_op``. + - You can also input an numpy array. + - More details about TensorFlow dynamic RNN in `Wild-ML Blog `__. + fw_initial_state : None or forward RNN State + If None, `initial_state` is zero state. + bw_initial_state : None or backward RNN State + If None, `initial_state` is zero state. + dropout : tuple of float or int + The input and output keep probability (input_keep_prob, output_keep_prob). + - If one int, input and output keep probability are the same. + n_layer : int + The number of RNN layers, default is 1. + return_last : boolean + Whether return last output or all outputs in each step. + - If True, return the last output, "Sequence input and single output" + - If False, return all outputs, "Synced sequence input and output" + - In other word, if you want to stack more RNNs on this layer, set to False. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. + dynamic_rnn_init_args : dictionary + The arguments for ``tf.nn.bidirectional_dynamic_rnn``. + name : str + A unique layer name. + + Attributes + ----------------------- + outputs : tensor + The output of this layer. (?, 2 * n_hidden) + + fw(bw)_final_state : tensor or StateTuple + The finial state of this layer. + - When `state_is_tuple` is `False`, it is the final hidden and cell states, `states.get_shape() = [?, 2 * n_hidden]`. + - When `state_is_tuple` is `True`, it stores two elements: `(c, h)`. + - In practice, you can get the final state after each iteration during training, then feed it to the initial state of next iteration. + + fw(bw)_initial_state : tensor or StateTuple + The initial state of this layer. + - In practice, you can set your state at the begining of each epoch or iteration according to your training procedure. + + batch_size : int or tensor + It is an integer, if it is able to compute the `batch_size`; otherwise, tensor for dynamic batch size. + + sequence_length : a tensor or array + The sequence lengths computed by Advanced Opt or the given sequence lengths, [batch_size]. + + + Notes + ----- + Input dimension should be rank 3 : [batch_size, n_steps(max), n_features], if no, please see :class:`ReshapeLayer`. + + + References + ---------- + - `Wild-ML Blog `__ + - `bidirectional_rnn.ipynb `__ + + """ + + def __init__( + self, + layer, + cell_fn, #tf.nn.rnn_cell.LSTMCell, + cell_init_args={'state_is_tuple': True}, + n_hidden=256, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + sequence_length=None, + fw_initial_state=None, + bw_initial_state=None, + dropout=None, + n_layer=1, + return_last=False, + return_seq_2d=False, + dynamic_rnn_init_args={}, + name='bi_dyrnn_layer', + ): + Layer.__init__(self, name=name) + if cell_fn is None: + raise Exception("Please put in cell_fn") + if 'GRU' in cell_fn.__name__: + try: + cell_init_args.pop('state_is_tuple') + except: + pass + self.inputs = layer.outputs + + logging.info("BiDynamicRNNLayer %s: n_hidden:%d in_dim:%d in_shape:%s cell_fn:%s dropout:%s n_layer:%d" % + (self.name, n_hidden, self.inputs.get_shape().ndims, self.inputs.get_shape(), cell_fn.__name__, dropout, n_layer)) + + # Input dimension should be rank 3 [batch_size, n_steps(max), n_features] + try: + self.inputs.get_shape().with_rank(3) + except: + raise Exception("RNN : Input dimension should be rank 3 : [batch_size, n_steps(max), n_features]") + + # Get the batch_size + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + logging.info(" batch_size (concurrent processes): %d" % batch_size) + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(self.inputs)[0] + logging.info(" non specified batch_size, uses a tensor instead.") + self.batch_size = batch_size + + with tf.variable_scope(name, initializer=initializer) as vs: + # Creats the cell function + # cell_instance_fn=lambda: cell_fn(num_units=n_hidden, **cell_init_args) # HanSheng + rnn_creator = lambda: cell_fn(num_units=n_hidden, **cell_init_args) + + # Apply dropout + if dropout: + if type(dropout) in [tuple, list]: + in_keep_prob = dropout[0] + out_keep_prob = dropout[1] + elif isinstance(dropout, float): + in_keep_prob, out_keep_prob = dropout, dropout + else: + raise Exception("Invalid dropout type (must be a 2-D tuple of " "float)") + try: + DropoutWrapper_fn = tf.contrib.rnn.DropoutWrapper + except: + DropoutWrapper_fn = tf.nn.rnn_cell.DropoutWrapper + + # cell_instance_fn1=cell_instance_fn # HanSheng + # cell_instance_fn=lambda: DropoutWrapper_fn( + # cell_instance_fn1(), + # input_keep_prob=in_keep_prob, + # output_keep_prob=out_keep_prob) + cell_creator = lambda is_last=True: \ + DropoutWrapper_fn(rnn_creator(), + input_keep_prob=in_keep_prob, + output_keep_prob=out_keep_prob if is_last else 1.0) # out_keep_prob) + else: + cell_creator = lambda: rnn_creator() + + # if dropout: + # self.fw_cell = DropoutWrapper_fn(self.fw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) + # self.bw_cell = DropoutWrapper_fn(self.bw_cell, input_keep_prob=1.0, output_keep_prob=out_keep_prob) + + # self.fw_cell=cell_instance_fn() + # self.bw_cell=cell_instance_fn() + # Initial state of RNN + + self.fw_initial_state = fw_initial_state + self.bw_initial_state = bw_initial_state + # Computes sequence_length + if sequence_length is None: + try: # TF1.0 + sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.stack(self.inputs)) + except: # TF0.12 + sequence_length = retrieve_seq_length_op(self.inputs if isinstance(self.inputs, tf.Tensor) else tf.pack(self.inputs)) + + if n_layer > 1: + self.fw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] + self.bw_cell = [cell_creator(is_last=i == n_layer - 1) for i in range(n_layer)] + from tensorflow.contrib.rnn import stack_bidirectional_dynamic_rnn + outputs, states_fw, states_bw = stack_bidirectional_dynamic_rnn( + cells_fw=self.fw_cell, + cells_bw=self.bw_cell, + inputs=self.inputs, + sequence_length=sequence_length, + initial_states_fw=self.fw_initial_state, + initial_states_bw=self.bw_initial_state, + dtype=D_TYPE, + **dynamic_rnn_init_args) + + else: + self.fw_cell = cell_creator() + self.bw_cell = cell_creator() + outputs, (states_fw, states_bw) = tf.nn.bidirectional_dynamic_rnn( + cell_fw=self.fw_cell, + cell_bw=self.bw_cell, + inputs=self.inputs, + sequence_length=sequence_length, + initial_state_fw=self.fw_initial_state, + initial_state_bw=self.bw_initial_state, + dtype=D_TYPE, + **dynamic_rnn_init_args) + + rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + logging.info(" n_params : %d" % (len(rnn_variables))) + # Manage the outputs + try: # TF1.0 + outputs = tf.concat(outputs, 2) + except: # TF0.12 + outputs = tf.concat(2, outputs) + if return_last: + # [batch_size, 2 * n_hidden] + raise Exception("Do not support return_last at the moment") + self.outputs = advanced_indexing_op(outputs, sequence_length) + else: + # [batch_size, n_step(max), 2 * n_hidden] + if return_seq_2d: + # PTB tutorial: + # 2D Tensor [n_example, 2 * n_hidden] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [-1, 2 * n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [-1, 2 * n_hidden]) + else: + # : + # 3D Tensor [batch_size, n_steps(max), 2 * n_hidden] + max_length = tf.shape(outputs)[1] + batch_size = tf.shape(outputs)[0] + try: # TF1.0 + self.outputs = tf.reshape(tf.concat(outputs, 1), [batch_size, max_length, 2 * n_hidden]) + except: # TF0.12 + self.outputs = tf.reshape(tf.concat(1, outputs), [batch_size, max_length, 2 * n_hidden]) + # self.outputs = tf.reshape(tf.concat(1, outputs), [-1, max_length, 2 * n_hidden]) + + # Final state + self.fw_final_states = states_fw + self.bw_final_states = states_bw + + self.sequence_length = sequence_length + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + self.all_layers.extend([self.outputs]) + self.all_params.extend(rnn_variables) + + +# Seq2seq +class Seq2Seq(Layer): + """ + The :class:`Seq2Seq` class is a simple :class:`DynamicRNNLayer` based Seq2seq layer without using `tl.contrib.seq2seq `__. + See `Model `__ + and `Sequence to Sequence Learning with Neural Networks `__. + + - Please check this example `Chatbot in 200 lines of code `__. + - The Author recommends users to read the source code of :class:`DynamicRNNLayer` and :class:`Seq2Seq`. + + Parameters + ---------- + net_encode_in : :class:`Layer` + Encode sequences, [batch_size, None, n_features]. + net_decode_in : :class:`Layer` + Decode sequences, [batch_size, None, n_features]. + cell_fn : TensorFlow cell function + A TensorFlow core RNN cell + - see `RNN Cells in TensorFlow `__ + - Note TF1.0+ and TF1.0- are different + cell_init_args : dictionary + The arguments for the cell initializer. + n_hidden : int + The number of hidden units in the layer. + initializer : initializer + The initializer for the parameters. + encode_sequence_length : tensor + For encoder sequence length, see :class:`DynamicRNNLayer` . + decode_sequence_length : tensor + For decoder sequence length, see :class:`DynamicRNNLayer` . + initial_state_encode : None or RNN state + If None, `initial_state_encode` is zero state, it can be set by placeholder or other RNN. + initial_state_decode : None or RNN state + If None, `initial_state_decode` is the final state of the RNN encoder, it can be set by placeholder or other RNN. + dropout : tuple of float or int + The input and output keep probability (input_keep_prob, output_keep_prob). + - If one int, input and output keep probability are the same. + n_layer : int + The number of RNN layers, default is 1. + return_seq_2d : boolean + Only consider this argument when `return_last` is `False` + - If True, return 2D Tensor [n_example, 2 * n_hidden], for stacking DenseLayer after it. + - If False, return 3D Tensor [n_example/n_steps, n_steps, 2 * n_hidden], for stacking multiple RNN after it. + name : str + A unique layer name. + + Attributes + ------------ + outputs : tensor + The output of RNN decoder. + initial_state_encode : tensor or StateTuple + Initial state of RNN encoder. + initial_state_decode : tensor or StateTuple + Initial state of RNN decoder. + final_state_encode : tensor or StateTuple + Final state of RNN encoder. + final_state_decode : tensor or StateTuple + Final state of RNN decoder. + + Notes + -------- + - How to feed data: `Sequence to Sequence Learning with Neural Networks `__ + - input_seqs : ``['how', 'are', 'you', '']`` + - decode_seqs : ``['', 'I', 'am', 'fine', '']`` + - target_seqs : ``['I', 'am', 'fine', '', '']`` + - target_mask : ``[1, 1, 1, 1, 0]`` + - related functions : tl.prepro + + Examples + ---------- + >>> from tensorlayer.layers import * + >>> batch_size = 32 + >>> encode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="encode_seqs") + >>> decode_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="decode_seqs") + >>> target_seqs = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_seqs") + >>> target_mask = tf.placeholder(dtype=tf.int64, shape=[batch_size, None], name="target_mask") # tl.prepro.sequences_get_mask() + >>> with tf.variable_scope("model"): + ... # for chatbot, you can use the same embedding layer, + ... # for translation, you may want to use 2 seperated embedding layers + >>> with tf.variable_scope("embedding") as vs: + >>> net_encode = EmbeddingInputlayer( + ... inputs = encode_seqs, + ... vocabulary_size = 10000, + ... embedding_size = 200, + ... name = 'seq_embedding') + >>> vs.reuse_variables() + >>> tl.layers.set_name_reuse(True) + >>> net_decode = EmbeddingInputlayer( + ... inputs = decode_seqs, + ... vocabulary_size = 10000, + ... embedding_size = 200, + ... name = 'seq_embedding') + >>> net = Seq2Seq(net_encode, net_decode, + ... cell_fn = tf.contrib.rnn.BasicLSTMCell, + ... n_hidden = 200, + ... initializer = tf.random_uniform_initializer(-0.1, 0.1), + ... encode_sequence_length = retrieve_seq_length_op2(encode_seqs), + ... decode_sequence_length = retrieve_seq_length_op2(decode_seqs), + ... initial_state_encode = None, + ... dropout = None, + ... n_layer = 1, + ... return_seq_2d = True, + ... name = 'seq2seq') + >>> net_out = DenseLayer(net, n_units=10000, act=tf.identity, name='output') + >>> e_loss = tl.cost.cross_entropy_seq_with_mask(logits=net_out.outputs, target_seqs=target_seqs, input_mask=target_mask, return_details=False, name='cost') + >>> y = tf.nn.softmax(net_out.outputs) + >>> net_out.print_params(False) + + """ + + def __init__( + self, + net_encode_in, + net_decode_in, + cell_fn, #tf.nn.rnn_cell.LSTMCell, + cell_init_args={'state_is_tuple': True}, + n_hidden=256, + initializer=tf.random_uniform_initializer(-0.1, 0.1), + encode_sequence_length=None, + decode_sequence_length=None, + initial_state_encode=None, + initial_state_decode=None, + dropout=None, + n_layer=1, + return_seq_2d=False, + name='seq2seq', + ): + Layer.__init__(self, name=name) + if cell_fn is None: + raise Exception("Please put in cell_fn") + if 'GRU' in cell_fn.__name__: + try: + cell_init_args.pop('state_is_tuple') + except: + pass + # self.inputs = layer.outputs + logging.info(" [**] Seq2Seq %s: n_hidden:%d cell_fn:%s dropout:%s n_layer:%d" % (self.name, n_hidden, cell_fn.__name__, dropout, n_layer)) + + with tf.variable_scope(name) as vs: #, reuse=reuse): + # tl.layers.set_name_reuse(reuse) + # network = InputLayer(self.inputs, name=name+'/input') + network_encode = DynamicRNNLayer( + net_encode_in, + cell_fn=cell_fn, + cell_init_args=cell_init_args, + n_hidden=n_hidden, + initializer=initializer, + initial_state=initial_state_encode, + dropout=dropout, + n_layer=n_layer, + sequence_length=encode_sequence_length, + return_last=False, + return_seq_2d=True, + name=name + '_encode') + # vs.reuse_variables() + # tl.layers.set_name_reuse(True) + network_decode = DynamicRNNLayer( + net_decode_in, + cell_fn=cell_fn, + cell_init_args=cell_init_args, + n_hidden=n_hidden, + initializer=initializer, + initial_state=(network_encode.final_state if initial_state_decode is None else initial_state_decode), + dropout=dropout, + n_layer=n_layer, + sequence_length=decode_sequence_length, + return_last=False, + return_seq_2d=return_seq_2d, + name=name + '_decode') + self.outputs = network_decode.outputs + + # rnn_variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + # Initial state + self.initial_state_encode = network_encode.initial_state + self.initial_state_decode = network_decode.initial_state + + # Final state + self.final_state_encode = network_encode.final_state + self.final_state_decode = network_decode.final_state + + # self.sequence_length = sequence_length + self.all_layers = list(network_encode.all_layers) + self.all_params = list(network_encode.all_params) + self.all_drop = dict(network_encode.all_drop) + + self.all_layers.extend(list(network_decode.all_layers)) + self.all_params.extend(list(network_decode.all_params)) + self.all_drop.update(dict(network_decode.all_drop)) + + self.all_layers.extend([self.outputs]) + # self.all_params.extend( rnn_variables ) + + self.all_layers = list_remove_repeat(self.all_layers) + self.all_params = list_remove_repeat(self.all_params) diff --git a/tensorlayer/layers/shape.py b/tensorlayer/layers/shape.py new file mode 100644 index 00000000..bc83889a --- /dev/null +++ b/tensorlayer/layers/shape.py @@ -0,0 +1,129 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class FlattenLayer(Layer): + """A layer that reshapes high-dimension input into a vector. + + Then we often apply DenseLayer, RNNLayer, ConcatLayer and etc on the top of a flatten layer. + [batch_size, mask_row, mask_col, n_mask] ---> [batch_size, mask_row * mask_col * n_mask] + + Parameters + ---------- + layer : :class:`Layer` + Previous layer. + name : str + A unique layer name. + + Examples + -------- + >>> x = tf.placeholder(tf.float32, shape=[None, 28, 28, 1]) + >>> net = tl.layers.InputLayer(x, name='input') + >>> net = tl.layers.FlattenLayer(net, name='flatten') + + """ + + def __init__( + self, + layer, + name='flatten_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + self.outputs = flatten_reshape(self.inputs, name=name) + self.n_units = int(self.outputs.get_shape()[-1]) + logging.info("FlattenLayer %s: %d" % (self.name, self.n_units)) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +class ReshapeLayer(Layer): + """A layer that reshapes a given tensor. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + shape : tuple of int + The output shape, see ``tf.reshape``. + name : str + A unique layer name. + + Examples + -------- + Use TensorLayer + + >>> x = tf.placeholder(tf.float32, shape=(None, 28, 28, 1)) + >>> net = tl.layers.InputLayer(x, name='input') + >>> net = tl.layers.ReshapeLayer(net, (-1, 28*28), name='reshape') + >>> print(net.outputs) + ... (?, 784) + + Use native TensorFlow API ``tf.reshape`` + + >>> x = tf.placeholder(tf.float32, shape=[None, 3]) + >>> y = tf.reshape(x, shape=[-1, 3, 3]) + >>> sess = tf.InteractiveSession() + >>> print(sess.run(y, feed_dict={x:[[1,1,1],[2,2,2],[3,3,3],[4,4,4],[5,5,5],[6,6,6]]})) + ... [[[ 1. 1. 1.] + ... [ 2. 2. 2.] + ... [ 3. 3. 3.]] + ... [[ 4. 4. 4.] + ... [ 5. 5. 5.] + ... [ 6. 6. 6.]]] + + """ + + def __init__( + self, + layer, + shape, + name='reshape_layer', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + self.outputs = tf.reshape(self.inputs, shape=shape, name=name) + logging.info("ReshapeLayer %s: %s" % (self.name, self.outputs.get_shape())) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + + +class TransposeLayer(Layer): + """A layer that transposes the dimension of a tensor. + + See `tf.transpose() `__ . + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + perm: list of int + The permutation of the dimensions, similar with ``numpy.transpose``. + name : str + A unique layer name. + + """ + + def __init__( + self, + layer, + perm, + name='transpose', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + assert perm is not None + + logging.info("TransposeLayer %s: perm:%s" % (self.name, perm)) + # with tf.variable_scope(name) as vs: + self.outputs = tf.transpose(self.inputs, perm=perm, name=name) + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + # self.all_params.extend( variables ) diff --git a/tensorlayer/layers/spatial_transformer.py b/tensorlayer/layers/spatial_transformer.py new file mode 100644 index 00000000..0f5414fd --- /dev/null +++ b/tensorlayer/layers/spatial_transformer.py @@ -0,0 +1,281 @@ +# -*- coding: utf-8 -*- + +from six.moves import xrange + +from .core import * + + +def transformer(U, theta, out_size, name='SpatialTransformer2dAffine'): + """Spatial Transformer Layer for `2D Affine Transformation `__ + , see :class:`SpatialTransformer2dAffineLayer` class. + + Parameters + ---------- + U : list of float + The output of a convolutional net should have the + shape [num_batch, height, width, num_channels]. + theta: float + The output of the localisation network should be [num_batch, 6], value range should be [0, 1] (via tanh). + out_size: tuple of int + The size of the output of the network (height, width) + name: str + Optional function name + + Returns + ------- + Tensor + The transformed tensor. + + References + ---------- + - `Spatial Transformer Networks `__ + - `TensorFlow/Models `__ + + Notes + ----- + To initialize the network to the identity transform init. + + >>> ``theta`` to + >>> identity = np.array([[1., 0., 0.], + ... [0., 1., 0.]]) + >>> identity = identity.flatten() + >>> theta = tf.Variable(initial_value=identity) + + """ + + def _repeat(x, n_repeats): + with tf.variable_scope('_repeat'): + rep = tf.transpose(tf.expand_dims(tf.ones(shape=tf.stack([ + n_repeats, + ])), 1), [1, 0]) + rep = tf.cast(rep, 'int32') + x = tf.matmul(tf.reshape(x, (-1, 1)), rep) + return tf.reshape(x, [-1]) + + def _interpolate(im, x, y, out_size): + with tf.variable_scope('_interpolate'): + # constants + num_batch = tf.shape(im)[0] + height = tf.shape(im)[1] + width = tf.shape(im)[2] + channels = tf.shape(im)[3] + + x = tf.cast(x, 'float32') + y = tf.cast(y, 'float32') + height_f = tf.cast(height, 'float32') + width_f = tf.cast(width, 'float32') + out_height = out_size[0] + out_width = out_size[1] + zero = tf.zeros([], dtype='int32') + max_y = tf.cast(tf.shape(im)[1] - 1, 'int32') + max_x = tf.cast(tf.shape(im)[2] - 1, 'int32') + + # scale indices from [-1, 1] to [0, width/height] + x = (x + 1.0) * (width_f) / 2.0 + y = (y + 1.0) * (height_f) / 2.0 + + # do sampling + x0 = tf.cast(tf.floor(x), 'int32') + x1 = x0 + 1 + y0 = tf.cast(tf.floor(y), 'int32') + y1 = y0 + 1 + + x0 = tf.clip_by_value(x0, zero, max_x) + x1 = tf.clip_by_value(x1, zero, max_x) + y0 = tf.clip_by_value(y0, zero, max_y) + y1 = tf.clip_by_value(y1, zero, max_y) + dim2 = width + dim1 = width * height + base = _repeat(tf.range(num_batch) * dim1, out_height * out_width) + base_y0 = base + y0 * dim2 + base_y1 = base + y1 * dim2 + idx_a = base_y0 + x0 + idx_b = base_y1 + x0 + idx_c = base_y0 + x1 + idx_d = base_y1 + x1 + + # use indices to lookup pixels in the flat image and restore + # channels dim + im_flat = tf.reshape(im, tf.stack([-1, channels])) + im_flat = tf.cast(im_flat, 'float32') + Ia = tf.gather(im_flat, idx_a) + Ib = tf.gather(im_flat, idx_b) + Ic = tf.gather(im_flat, idx_c) + Id = tf.gather(im_flat, idx_d) + + # and finally calculate interpolated values + x0_f = tf.cast(x0, 'float32') + x1_f = tf.cast(x1, 'float32') + y0_f = tf.cast(y0, 'float32') + y1_f = tf.cast(y1, 'float32') + wa = tf.expand_dims(((x1_f - x) * (y1_f - y)), 1) + wb = tf.expand_dims(((x1_f - x) * (y - y0_f)), 1) + wc = tf.expand_dims(((x - x0_f) * (y1_f - y)), 1) + wd = tf.expand_dims(((x - x0_f) * (y - y0_f)), 1) + output = tf.add_n([wa * Ia, wb * Ib, wc * Ic, wd * Id]) + return output + + def _meshgrid(height, width): + with tf.variable_scope('_meshgrid'): + # This should be equivalent to: + # x_t, y_t = np.meshgrid(np.linspace(-1, 1, width), + # np.linspace(-1, 1, height)) + # ones = np.ones(np.prod(x_t.shape)) + # grid = np.vstack([x_t.flatten(), y_t.flatten(), ones]) + x_t = tf.matmul(tf.ones(shape=tf.stack([height, 1])), tf.transpose(tf.expand_dims(tf.linspace(-1.0, 1.0, width), 1), [1, 0])) + y_t = tf.matmul(tf.expand_dims(tf.linspace(-1.0, 1.0, height), 1), tf.ones(shape=tf.stack([1, width]))) + + x_t_flat = tf.reshape(x_t, (1, -1)) + y_t_flat = tf.reshape(y_t, (1, -1)) + + ones = tf.ones_like(x_t_flat) + grid = tf.concat(axis=0, values=[x_t_flat, y_t_flat, ones]) + return grid + + def _transform(theta, input_dim, out_size): + with tf.variable_scope('_transform'): + num_batch = tf.shape(input_dim)[0] + height = tf.shape(input_dim)[1] + width = tf.shape(input_dim)[2] + num_channels = tf.shape(input_dim)[3] + theta = tf.reshape(theta, (-1, 2, 3)) + theta = tf.cast(theta, 'float32') + + # grid of (x_t, y_t, 1), eq (1) in ref [1] + height_f = tf.cast(height, 'float32') + width_f = tf.cast(width, 'float32') + out_height = out_size[0] + out_width = out_size[1] + grid = _meshgrid(out_height, out_width) + grid = tf.expand_dims(grid, 0) + grid = tf.reshape(grid, [-1]) + grid = tf.tile(grid, tf.stack([num_batch])) + grid = tf.reshape(grid, tf.stack([num_batch, 3, -1])) + + # Transform A x (x_t, y_t, 1)^T -> (x_s, y_s) + T_g = tf.matmul(theta, grid) + x_s = tf.slice(T_g, [0, 0, 0], [-1, 1, -1]) + y_s = tf.slice(T_g, [0, 1, 0], [-1, 1, -1]) + x_s_flat = tf.reshape(x_s, [-1]) + y_s_flat = tf.reshape(y_s, [-1]) + + input_transformed = _interpolate(input_dim, x_s_flat, y_s_flat, out_size) + + output = tf.reshape(input_transformed, tf.stack([num_batch, out_height, out_width, num_channels])) + return output + + with tf.variable_scope(name): + output = _transform(theta, U, out_size) + return output + + +def batch_transformer(U, thetas, out_size, name='BatchSpatialTransformer2dAffine'): + """Batch Spatial Transformer function for `2D Affine Transformation `__. + + Parameters + ---------- + U : list of float + tensor of inputs [batch, height, width, num_channels] + thetas : list of float + a set of transformations for each input [batch, num_transforms, 6] + out_size : list of int + the size of the output [out_height, out_width] + name : str + optional function name + + Returns + ------ + float + Tensor of size [batch * num_transforms, out_height, out_width, num_channels] + + """ + with tf.variable_scope(name): + num_batch, num_transforms = map(int, thetas.get_shape().as_list()[:2]) + indices = [[i] * num_transforms for i in xrange(num_batch)] + input_repeated = tf.gather(U, tf.reshape(indices, [-1])) + return transformer(input_repeated, thetas, out_size) + + +class SpatialTransformer2dAffineLayer(Layer): + """The :class:`SpatialTransformer2dAffineLayer` class is a 2D `Spatial Transformer Layer `__ for + `2D Affine Transformation `__. + + Parameters + ----------- + layer : :class:`Layer` + Previous layer. + theta_layer : :class:`Layer` + The localisation network. + - We will use a :class:`DenseLayer` to make the theta size to [batch, 6], value range to [0, 1] (via tanh). + out_size : tuple of int + The size of the output of the network (height, width), the feature maps will be resized by this. + name : str + A unique layer name. + + References + ----------- + - `Spatial Transformer Networks `__ + - `TensorFlow/Models `__ + + """ + + def __init__( + self, + layer=None, + theta_layer=None, + out_size=[40, 40], + name='sapatial_trans_2d_affine', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + self.theta_layer = theta_layer + logging.info("SpatialTransformer2dAffineLayer %s: in_size:%s out_size:%s" % (name, self.inputs.get_shape().as_list(), out_size)) + + with tf.variable_scope(name) as vs: + # 1. make the localisation network to [batch, 6] via Flatten and Dense. + if self.theta_layer.outputs.get_shape().ndims > 2: + self.theta_layer.outputs = flatten_reshape(self.theta_layer.outputs, 'flatten') + # 2. To initialize the network to the identity transform init. + # 2.1 W + n_in = int(self.theta_layer.outputs.get_shape()[-1]) + shape = (n_in, 6) + W = tf.get_variable(name='W', initializer=tf.zeros(shape), dtype=D_TYPE) + # 2.2 b + identity = tf.constant(np.array([[1., 0, 0], [0, 1., 0]]).astype('float32').flatten()) + b = tf.get_variable(name='b', initializer=identity, dtype=D_TYPE) + # 2.3 transformation matrix + self.theta = tf.nn.tanh(tf.matmul(self.theta_layer.outputs, W) + b) + # 3. Spatial Transformer Sampling + # 3.1 transformation + self.outputs = transformer(self.inputs, self.theta, out_size=out_size) + # 3.2 automatically set batch_size and channels + # e.g. [?, 40, 40, ?] --> [64, 40, 40, 1] or [64, 20, 20, 4]/ Hao Dong + # + fixed_batch_size = self.inputs.get_shape().with_rank_at_least(1)[0] + if fixed_batch_size.value: + batch_size = fixed_batch_size.value + else: + from tensorflow.python.ops import array_ops + batch_size = array_ops.shape(self.inputs)[0] + size = self.inputs.get_shape().as_list() + n_channels = self.inputs.get_shape().as_list()[-1] + # logging.info(self.outputs) + self.outputs = tf.reshape(self.outputs, shape=[batch_size, out_size[0], out_size[1], n_channels]) + # logging.info(self.outputs) + # exit() + # 4. Get all parameters + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + # fixed + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + # theta_layer + self.all_layers.extend(theta_layer.all_layers) + self.all_params.extend(theta_layer.all_params) + self.all_drop.update(theta_layer.all_drop) + + # this layer + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) diff --git a/tensorlayer/layers/special_activation.py b/tensorlayer/layers/special_activation.py new file mode 100644 index 00000000..1edcfd27 --- /dev/null +++ b/tensorlayer/layers/special_activation.py @@ -0,0 +1,58 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class PReluLayer(Layer): + """ + The :class:`PReluLayer` class is Parametric Rectified Linear layer. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer。 + channel_shared : boolean + If True, single weight is shared by all channels. + a_init : initializer + The initializer for initializing the alpha(s). + a_init_args : dictionary + The arguments for initializing the alpha(s). + name : str + A unique layer name. + + References + ----------- + - `Delving Deep into Rectifiers: Surpassing Human-Level Performance on ImageNet Classification `__ + + """ + + def __init__( + self, + layer, + channel_shared=False, + a_init=tf.constant_initializer(value=0.0), + a_init_args={}, + # restore = True, + name="prelu_layer"): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("PReluLayer %s: channel_shared:%s" % (self.name, channel_shared)) + if channel_shared: + w_shape = (1, ) + else: + w_shape = int(self.inputs.get_shape()[-1]) + + # with tf.name_scope(name) as scope: + with tf.variable_scope(name) as vs: + alphas = tf.get_variable(name='alphas', shape=w_shape, initializer=a_init, dtype=D_TYPE, **a_init_args) + try: # TF 1.0 + self.outputs = tf.nn.relu(self.inputs) + tf.multiply(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5 + except: # TF 0.12 + self.outputs = tf.nn.relu(self.inputs) + tf.mul(alphas, (self.inputs - tf.abs(self.inputs))) * 0.5 + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + + self.all_layers.extend([self.outputs]) + self.all_params.extend([alphas]) diff --git a/tensorlayer/layers/stack.py b/tensorlayer/layers/stack.py new file mode 100644 index 00000000..3480bf44 --- /dev/null +++ b/tensorlayer/layers/stack.py @@ -0,0 +1,97 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class StackLayer(Layer): + """ + The :class:`StackLayer` class is layer for stacking a list of rank-R tensors into one rank-(R+1) tensor, see `tf.stack() `__. + + Parameters + ---------- + layers : list of :class:`Layer` + Previous layers to stack. + axis : int + Dimension along which to concatenate. + name : str + A unique layer name. + + """ + + def __init__( + self, + layers, + axis=0, + name='stack', + ): + Layer.__init__(self, name=name) + self.inputs = [] + for l in layers: + self.inputs.append(l.outputs) + + self.outputs = tf.stack(self.inputs, axis=axis, name=name) + + logging.info("StackLayer %s: axis: %d" % (self.name, axis)) + + self.all_layers = list(layers[0].all_layers) + self.all_params = list(layers[0].all_params) + self.all_drop = dict(layers[0].all_drop) + + for i in range(1, len(layers)): + self.all_layers.extend(list(layers[i].all_layers)) + self.all_params.extend(list(layers[i].all_params)) + self.all_drop.update(dict(layers[i].all_drop)) + + self.all_layers = list_remove_repeat(self.all_layers) + self.all_params = list_remove_repeat(self.all_params) + + +def unstack_layer(layer, num=None, axis=0, name='unstack'): + """ + It is layer for unstacking the given dimension of a rank-R tensor into rank-(R-1) tensors., see `tf.unstack() `__. + + Parameters + ---------- + layer : :class:`Layer` + Previous layer + num : int or None + The length of the dimension axis. Automatically inferred if None (the default). + axis : int + Dimension along which axis to concatenate. + name : str + A unique layer name. + + Returns + ------- + list of :class:`Layer` + The list of layer objects unstacked from the input. + + """ + inputs = layer.outputs + with tf.variable_scope(name) as vs: + outputs = tf.unstack(inputs, num=num, axis=axis) + + logging.info("UnStackLayer %s: num: %s axis: %d, n_outputs: %d" % (name, num, axis, len(outputs))) + + net_new = [] + scope_name = tf.get_variable_scope().name + if scope_name: + whole_name = scope_name + '/' + name + else: + whole_name = name + + for i in range(len(outputs)): + n = Layer(None, name=whole_name + str(i)) + n.outputs = outputs[i] + n.all_layers = list(layer.all_layers) + n.all_params = list(layer.all_params) + n.all_drop = dict(layer.all_drop) + n.all_layers.extend([inputs]) + + net_new.append(n) + + return net_new + + +# Alias +UnStackLayer = unstack_layer diff --git a/tensorlayer/layers/super_resolution.py b/tensorlayer/layers/super_resolution.py new file mode 100644 index 00000000..2dd91409 --- /dev/null +++ b/tensorlayer/layers/super_resolution.py @@ -0,0 +1,166 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +def subpixel_conv2d(net, scale=2, n_out_channel=None, act=tf.identity, name='subpixel_conv2d'): + """It is a 2D sub-pixel up-sampling layer, usually be used + for Super-Resolution applications, see `SRGAN `__ for example. + + Parameters + ------------ + net : :class:`Layer` + Previous layer, + scale : int + The up-scaling ratio, a wrong setting will lead to dimension size error. + n_out_channel : int or None + The number of output channels. + - If None, automatically set n_out_channel == the number of input channels / (scale x scale). + - The number of input channels == (scale x scale) x The number of output channels. + act : activation function + The activation function of this layer. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A 2D sub-pixel up-sampling layer + + Examples + --------- + >>> # examples here just want to tell you how to set the n_out_channel. + >>> x = np.random.rand(2, 16, 16, 4) + >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4), name="X") + >>> net = InputLayer(X, name='input') + >>> net = SubpixelConv2d(net, scale=2, n_out_channel=1, name='subpixel_conv2d') + >>> y = sess.run(net.outputs, feed_dict={X: x}) + >>> print(x.shape, y.shape) + ... (2, 16, 16, 4) (2, 32, 32, 1) + >>> + >>> x = np.random.rand(2, 16, 16, 4*10) + >>> X = tf.placeholder("float32", shape=(2, 16, 16, 4*10), name="X") + >>> net = InputLayer(X, name='input2') + >>> net = SubpixelConv2d(net, scale=2, n_out_channel=10, name='subpixel_conv2d2') + >>> y = sess.run(net.outputs, feed_dict={X: x}) + >>> print(x.shape, y.shape) + ... (2, 16, 16, 40) (2, 32, 32, 10) + >>> + >>> x = np.random.rand(2, 16, 16, 25*10) + >>> X = tf.placeholder("float32", shape=(2, 16, 16, 25*10), name="X") + >>> net = InputLayer(X, name='input3') + >>> net = SubpixelConv2d(net, scale=5, n_out_channel=None, name='subpixel_conv2d3') + >>> y = sess.run(net.outputs, feed_dict={X: x}) + >>> print(x.shape, y.shape) + ... (2, 16, 16, 250) (2, 80, 80, 10) + + References + ------------ + - `Real-Time Single Image and Video Super-Resolution Using an Efficient Sub-Pixel Convolutional Neural Network `__ + + """ + # github/Tetrachrome/subpixel https://github.com/Tetrachrome/subpixel/blob/master/subpixel.py + + _err_log = "SubpixelConv2d: The number of input channels == (scale x scale) x The number of output channels" + + scope_name = tf.get_variable_scope().name + if scope_name: + whole_name = scope_name + '/' + name + else: + whole_name = name + + def _PS(X, r, n_out_channels): + if n_out_channels >= 1: + assert int(X.get_shape()[-1]) == (r**2) * n_out_channels, _err_log + """ + bsize, a, b, c = X.get_shape().as_list() + bsize = tf.shape(X)[0] # Handling Dimension(None) type for undefined batch dim + Xs=tf.split(X,r,3) #b*h*w*r*r + Xr=tf.concat(Xs,2) #b*h*(r*w)*r + X=tf.reshape(Xr,(bsize,r*a,r*b,n_out_channel)) # b*(r*h)*(r*w)*c + + """ + X = tf.depth_to_space(X, r) + else: + logging.info(_err_log) + return X + + inputs = net.outputs + + if n_out_channel is None: + assert int(inputs.get_shape()[-1]) / (scale**2) % 1 == 0, _err_log + n_out_channel = int(int(inputs.get_shape()[-1]) / (scale**2)) + + logging.info("SubpixelConv2d %s: scale: %d n_out_channel: %s act: %s" % (name, scale, n_out_channel, act.__name__)) + + net_new = Layer(inputs, name=whole_name) + # with tf.name_scope(name): + with tf.variable_scope(name) as vs: + net_new.outputs = act(_PS(inputs, r=scale, n_out_channels=n_out_channel)) + + net_new.all_layers = list(net.all_layers) + net_new.all_params = list(net.all_params) + net_new.all_drop = dict(net.all_drop) + net_new.all_layers.extend([net_new.outputs]) + return net_new + + +def subpixel_conv1d(net, scale=2, act=tf.identity, name='subpixel_conv1d'): + """It is a 1D sub-pixel up-sampling layer. + + Calls a TensorFlow function that directly implements this functionality. + We assume input has dim (batch, width, r) + + Parameters + ------------ + net : :class:`Layer` + Previous layer with output shape of (batch, width, r). + scale : int + The up-scaling ratio, a wrong setting will lead to Dimension size error. + act : activation function + The activation function of this layer. + name : str + A unique layer name. + + Returns + ------- + :class:`Layer` + A 1D sub-pixel up-sampling layer + + Examples + ---------- + >>> t_signal = tf.placeholder('float32', [10, 100, 4], name='x') + >>> n = InputLayer(t_signal, name='in') + >>> n = SubpixelConv1d(n, scale=2, name='s') + >>> print(n.outputs.shape) + ... (10, 200, 2) + + References + ----------- + `Audio Super Resolution Implementation `__. + + """ + + def _PS(I, r): + X = tf.transpose(I, [2, 1, 0]) # (r, w, b) + X = tf.batch_to_space_nd(X, [r], [[0, 0]]) # (1, r*w, b) + X = tf.transpose(X, [2, 1, 0]) + return X + + logging.info("SubpixelConv1d %s: scale: %d act: %s" % (name, scale, act.__name__)) + + inputs = net.outputs + net_new = Layer(inputs, name=name) + with tf.name_scope(name): + net_new.outputs = act(_PS(inputs, r=scale)) + + net_new.all_layers = list(net.all_layers) + net_new.all_params = list(net.all_params) + net_new.all_drop = dict(net.all_drop) + net_new.all_layers.extend([net_new.outputs]) + return net_new + + +# Alias +SubpixelConv2d = subpixel_conv2d +SubpixelConv1d = subpixel_conv1d diff --git a/tensorlayer/layers/time_distribution.py b/tensorlayer/layers/time_distribution.py new file mode 100644 index 00000000..943259b6 --- /dev/null +++ b/tensorlayer/layers/time_distribution.py @@ -0,0 +1,78 @@ +# -*- coding: utf-8 -*- + +from .core import * + + +class TimeDistributedLayer(Layer): + """ + The :class:`TimeDistributedLayer` class that applies a function to every timestep of the input tensor. + For example, if use :class:`DenseLayer` as the `layer_class`, we input (batch_size, length, dim) and + output (batch_size , length, new_dim). + + Parameters + ---------- + layer : :class:`Layer` + Previous layer with output size of (batch_size, length, dim). + layer_class : a :class:`Layer` class + The layer class name. + args : dictionary + The arguments for the ``layer_class``. + name : str + A unique layer name. + + Examples + -------- + >>> batch_size = 32 + >>> timestep = 20 + >>> input_dim = 100 + >>> x = tf.placeholder(dtype=tf.float32, shape=[batch_size, timestep, input_dim], name="encode_seqs") + >>> net = InputLayer(x, name='input') + >>> net = TimeDistributedLayer(net, layer_class=DenseLayer, args={'n_units':50, 'name':'dense'}, name='time_dense') + ... [TL] InputLayer input: (32, 20, 100) + ... [TL] TimeDistributedLayer time_dense: layer_class:DenseLayer + >>> print(net.outputs._shape) + ... (32, 20, 50) + >>> net.print_params(False) + ... param 0: (100, 50) time_dense/dense/W:0 + ... param 1: (50,) time_dense/dense/b:0 + ... num of params: 5050 + + """ + + def __init__( + self, + layer, + layer_class=None, + args={}, + name='time_distributed', + ): + Layer.__init__(self, name=name) + self.inputs = layer.outputs + logging.info("TimeDistributedLayer %s: layer_class:%s args:%s" % (self.name, layer_class.__name__, args)) + + if not args: args = dict() + assert isinstance(args, dict), "'args' must be a dict." + + if not isinstance(self.inputs, tf.Tensor): + self.inputs = tf.transpose(tf.stack(self.inputs), [1, 0, 2]) + + input_shape = self.inputs.get_shape() + + timestep = input_shape[1] + x = tf.unstack(self.inputs, axis=1) + + for i in range(0, timestep): + with tf.variable_scope(name, reuse=(set_keep['name_reuse'] if i == 0 else True)) as vs: + set_name_reuse((set_keep['name_reuse'] if i == 0 else True)) + net = layer_class(InputLayer(x[i], name=args['name'] + str(i)), **args) + # net = layer_class(InputLayer(x[i], name="input_"+args['name']), **args) + x[i] = net.outputs + variables = tf.get_collection(TF_GRAPHKEYS_VARIABLES, scope=vs.name) + + self.outputs = tf.stack(x, axis=1, name=name) + + self.all_layers = list(layer.all_layers) + self.all_params = list(layer.all_params) + self.all_drop = dict(layer.all_drop) + self.all_layers.extend([self.outputs]) + self.all_params.extend(variables) diff --git a/tensorlayer/nlp.py b/tensorlayer/nlp.py index fe1273bd..2ddfc7bb 100644 --- a/tensorlayer/nlp.py +++ b/tensorlayer/nlp.py @@ -1,51 +1,54 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- -import tensorflow as tf -import os -import re -from sys import platform as _platform import collections +import os import random -import numpy as np +import re +# Metric +import subprocess +import tempfile import warnings -from six.moves import xrange + +import numpy as np +import tensorflow as tf +from six.moves import urllib, xrange from tensorflow.python.platform import gfile -import re -# Iteration functions +from . import _logging as logging def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_index=0): """Generate a training batch for the Skip-Gram model. + See `Word2Vec example `__. + Parameters ---------- - data : a list - To present context. - batch_size : an int + data : list of data + To present context, usually a list of integers. + batch_size : int Batch size to return. - num_skips : an int + num_skips : int How many times to reuse an input to generate a label. - skip_window : an int + skip_window : int How many words to consider left and right. - data_index : an int - Index of the context location. - without using yield, this code use data_index to instead. + data_index : int + Index of the context location. This code use `data_index` to instead of yield like ``tl.iterate``. Returns - -------- - batch : a list - Inputs - labels : a list + ------- + batch : list of data + Inputs. + labels : list of data Labels - data_index : an int + data_index : int Index of the context location. Examples -------- - - Setting num_skips=2, skip_window=1, use the right and left words. - In the same way, num_skips=4, skip_window=2 means use the nearby 4 words. + Setting num_skips=2, skip_window=1, use the right and left words. + In the same way, num_skips=4, skip_window=2 means use the nearby 4 words. + >>> data = [1,2,3,4,5,6,7,8,9,10,11] >>> batch, labels, data_index = tl.nlp.generate_skip_gram_batch(data=data, batch_size=8, num_skips=2, skip_window=1, data_index=0) >>> print(batch) @@ -60,9 +63,6 @@ def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_inde ... [4] ... [6]] - References - ----------- - - `TensorFlow word2vec tutorial `_ """ # global data_index # you can put data_index outside the function, then # modify the global data_index in the function without return it. @@ -90,27 +90,25 @@ def generate_skip_gram_batch(data, batch_size, num_skips, skip_window, data_inde return batch, labels, data_index -# Sampling functions def sample(a=[], temperature=1.0): """Sample an index from a probability array. Parameters ---------- - a : a list + a : list of float List of probabilities. temperature : float or None - The higher the more uniform.\n - When a = [0.1, 0.2, 0.7],\n - temperature = 0.7, the distribution will be sharpen [ 0.05048273 0.13588945 0.81362782]\n - temperature = 1.0, the distribution will be the same [0.1 0.2 0.7]\n - temperature = 1.5, the distribution will be filtered [ 0.16008435 0.25411807 0.58579758]\n - If None, it will be ``np.argmax(a)`` + The higher the more uniform. When a = [0.1, 0.2, 0.7], + - temperature = 0.7, the distribution will be sharpen [0.05048273, 0.13588945, 0.81362782] + - temperature = 1.0, the distribution will be the same [0.1, 0.2, 0.7] + - temperature = 1.5, the distribution will be filtered [0.16008435, 0.25411807, 0.58579758] + - If None, it will be ``np.argmax(a)`` Notes ------ - - No matter what is the temperature and input list, the sum of all probabilities will be one. - Even if input list = [1, 100, 200], the sum of all probabilities will still be one. - - For large vocabulary_size, choice a higher temperature to avoid error. + - No matter what is the temperature and input list, the sum of all probabilities will be one. Even if input list = [1, 100, 200], the sum of all probabilities will still be one. + - For large vocabulary size, choice a higher temperature or ``tl.nlp.sample_top`` to avoid error. + """ b = np.copy(a) try: @@ -124,16 +122,17 @@ def sample(a=[], temperature=1.0): return np.argmax(np.random.multinomial(1, a, 1)) except: # np.set_printoptions(threshold=np.nan) - # print(a) - # print(np.sum(a)) - # print(np.max(a)) - # print(np.min(a)) + # logging.info(a) + # logging.info(np.sum(a)) + # logging.info(np.max(a)) + # logging.info(np.min(a)) # exit() message = "For large vocabulary_size, choice a higher temperature\ to avoid log error. Hint : use ``sample_top``. " + warnings.warn(message, Warning) - # print(a) - # print(b) + # logging.info(a) + # logging.info(b) return np.argmax(np.random.multinomial(1, b, 1)) @@ -142,14 +141,15 @@ def sample_top(a=[], top_k=10): Parameters ---------- - a : a list + a : list of float List of probabilities. top_k : int Number of candidates to be considered. + """ idx = np.argpartition(a, -top_k)[-top_k:] probs = a[idx] - # print("new", probs) + # logging.info("new %f" % probs) probs = probs / np.sum(probs) choice = np.random.choice(idx, p=probs) return choice @@ -159,7 +159,7 @@ def sample_top(a=[], top_k=10): # idx = idx[:top_k] # # a = a[idx] # probs = a[idx] - # print("prev", probs) + # logging.info("prev %f" % probs) # # probs = probs / np.sum(probs) # # choice = np.random.choice(idx, p=probs) # # return choice @@ -171,13 +171,15 @@ class SimpleVocabulary(object): Parameters ------------ - vocab : A dictionary of word to word_id. - unk_id : Id of the special 'unknown' word. + vocab : dictionary + A dictionary that maps word to ID. + unk_id : int + The ID for 'unknown' word. + """ def __init__(self, vocab, unk_id): - """Initializes the vocabulary.""" - + """Initialize the vocabulary.""" self._vocab = vocab self._unk_id = unk_id @@ -190,30 +192,39 @@ def word_to_id(self, word): class Vocabulary(object): - """Create Vocabulary class from a given vocabulary and its id-word, word-id convert, - see create_vocab() and ``tutorial_tfrecord3.py``. + """Create Vocabulary class from a given vocabulary and its id-word, word-id convert. + See create_vocab() and ``tutorial_tfrecord3.py``. Parameters ----------- - vocab_file : File containing the vocabulary, where the words are the first - whitespace-separated token on each line (other tokens are ignored) and - the word ids are the corresponding line numbers. - start_word : Special word denoting sentence start. - end_word : Special word denoting sentence end. - unk_word : Special word denoting unknown words. + vocab_file : str + The file contains the vocabulary (can be created via ``tl.nlp.create_vocab``), where the words are the first whitespace-separated token on each line (other tokens are ignored) and the word ids are the corresponding line numbers. + start_word : str + Special word denoting sentence start. + end_word : str + Special word denoting sentence end. + unk_word : str + Special word denoting unknown words. Attributes ------------ - vocab : a dictionary from word to id. - reverse_vocab : a list from id to word. - start_id : int of start id - end_id : int of end id - unk_id : int of unk id - pad_id : int of padding id - - Vocab_files + vocab : dictionary + A dictionary that maps word to ID. + reverse_vocab : list of int + A list that maps ID to word. + start_id : int + For start ID. + end_id : int + For end ID. + unk_id : int + For unknown ID. + pad_id : int + For Padding ID. + + Examples ------------- - >>> Look as follow, includes `start_word` , `end_word` but no `unk_word` . + The vocab file looks like follow, includes `start_word` , `end_word` ... + >>> a 969108 >>> 586368 >>> 586368 @@ -225,17 +236,13 @@ class Vocabulary(object): >>> with 152984 >>> and 139109 >>> is 97322 + """ - def __init__(self, - vocab_file, - start_word="", - end_word="", - unk_word="", - pad_word=""): + def __init__(self, vocab_file, start_word="", end_word="", unk_word="", pad_word=""): if not tf.gfile.Exists(vocab_file): - tf.logging.fatal("Vocab file %s not found.", vocab_file) - tf.logging.info("Initializing vocabulary from file: %s", vocab_file) + tf.logging.fatal("Vocab file %s not found." % vocab_file) + tf.logging.info("Initializing vocabulary from file: %s" % vocab_file) with tf.gfile.GFile(vocab_file, mode="r") as f: reverse_vocab = list(f.readlines()) @@ -253,8 +260,8 @@ def __init__(self, vocab = dict([(x, y) for (y, x) in enumerate(reverse_vocab)]) - print(" [TL] Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word)) - print(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab)) + logging.info("Vocabulary from %s : %s %s %s" % (vocab_file, start_word, end_word, unk_word)) + logging.info(" vocabulary with %d words (includes start_word, end_word, unk_word)" % len(vocab)) # tf.logging.info(" vocabulary with %d words" % len(vocab)) self.vocab = vocab # vocab[word] = id @@ -265,10 +272,10 @@ def __init__(self, self.end_id = vocab[end_word] self.unk_id = vocab[unk_word] self.pad_id = vocab[pad_word] - print(" start_id: %d" % self.start_id) - print(" end_id: %d" % self.end_id) - print(" unk_id: %d" % self.unk_id) - print(" pad_id: %d" % self.pad_id) + logging.info(" start_id: %d" % self.start_id) + logging.info(" end_id : %d" % self.end_id) + logging.info(" unk_id : %d" % self.unk_id) + logging.info(" pad_id : %d" % self.pad_id) def word_to_id(self, word): """Returns the integer word id of a word string.""" @@ -286,18 +293,22 @@ def id_to_word(self, word_id): def process_sentence(sentence, start_word="", end_word=""): - """Converts a sentence string into a list of string words, add start_word and end_word, + """Seperate a sentence string into a list of string words, add start_word and end_word, see ``create_vocab()`` and ``tutorial_tfrecord3.py``. - Parameter - --------- - sentence : a sentence in string. - start_word : a string or None, if None, non start word will be appended. - end_word : a string or None, if None, non end word will be appended. + Parameters + ---------- + sentence : str + A sentence. + start_word : str or None + The start word. If None, no start word will be appended. + end_word : str or None + The end word. If None, no end word will be appended. Returns --------- - A list of strings; the processed caption. + list of str + A list of strings that separated into words. Examples ----------- @@ -309,8 +320,9 @@ def process_sentence(sentence, start_word="", end_word=""): Notes ------- - You have to install the following package. - - `Installing NLTK `_ - - `Installing NLTK data `_ + - `Installing NLTK `__ + - `Installing NLTK data `__ + """ try: import nltk @@ -327,29 +339,31 @@ def process_sentence(sentence, start_word="", end_word=""): def create_vocab(sentences, word_counts_output_file, min_word_count=1): - """Creates the vocabulary of word to word_id, see create_vocab() and ``tutorial_tfrecord3.py``. + """Creates the vocabulary of word to word_id. + + See ``tutorial_tfrecord3.py``. The vocabulary is saved to disk in a text file of word counts. The id of each word in the file is its corresponding 0-based line number. Parameters ------------ - sentences : a list of lists of strings. - word_counts_output_file : A string + sentences : list of list of str + All sentences for creating the vocabulary. + word_counts_output_file : str The file name. - min_word_count : a int + min_word_count : int Minimum number of occurrences for a word. Returns -------- - - tl.nlp.SimpleVocabulary object. - - Notes - ------- - - See more ``tl.nlp.build_vocab()`` + :class:`SimpleVocabulary` + The simple vocabulary object, see :class:`Vocabulary` for more. Examples -------- + Pre-process sentences + >>> captions = ["one two , three", "four five five"] >>> processed_capts = [] >>> for c in captions: @@ -358,11 +372,16 @@ def create_vocab(sentences, word_counts_output_file, min_word_count=1): >>> print(processed_capts) ...[['', 'one', 'two', ',', 'three', ''], ['', 'four', 'five', 'five', '']] + Create vocabulary + >>> tl.nlp.create_vocab(processed_capts, word_counts_output_file='vocab.txt', min_word_count=1) - ... [TL] Creating vocabulary. + ... Creating vocabulary. ... Total words: 8 ... Words in vocabulary: 8 ... Wrote vocabulary file: vocab.txt + + Get vocabulary object + >>> vocab = tl.nlp.Vocabulary('vocab.txt', start_word="", end_word="", unk_word="") ... INFO:tensorflow:Initializing vocabulary from file: vocab.txt ... [TL] Vocabulary from vocab.txt : @@ -371,26 +390,27 @@ def create_vocab(sentences, word_counts_output_file, min_word_count=1): ... end_id: 3 ... unk_id: 9 ... pad_id: 0 + """ from collections import Counter - print(" [TL] Creating vocabulary.") + logging.info("Creating vocabulary.") counter = Counter() for c in sentences: counter.update(c) - # print('c',c) - print(" Total words: %d" % len(counter)) + # logging.info('c',c) + logging.info(" Total words: %d" % len(counter)) # Filter uncommon words and sort by descending count. word_counts = [x for x in counter.items() if x[1] >= min_word_count] word_counts.sort(key=lambda x: x[1], reverse=True) word_counts = [("", 0)] + word_counts # 1st id should be reserved for padding - # print(word_counts) - print(" Words in vocabulary: %d" % len(word_counts)) + # logging.info(word_counts) + logging.info(" Words in vocabulary: %d" % len(word_counts)) # Write out the word counts file. with tf.gfile.FastGFile(word_counts_output_file, "w") as f: f.write("\n".join(["%s %d" % (w, c) for w, c in word_counts])) - print(" Wrote vocabulary file: %s" % word_counts_output_file) + logging.info(" Wrote vocabulary file: %s" % word_counts_output_file) # Create the vocabulary dictionary. reverse_vocab = [x[0] for x in word_counts] @@ -407,12 +427,14 @@ def simple_read_words(filename="nietzsche.txt"): Parameters ---------- - filename : a string + filename : str A file path (like .txt file) Returns -------- - The context in a string + str + The context in a string. + """ with open(filename, "r") as f: words = f.read() @@ -420,27 +442,29 @@ def simple_read_words(filename="nietzsche.txt"): def read_words(filename="nietzsche.txt", replace=['\n', '']): - """ File to list format context. Note that, this script can not handle punctuations. + """Read list format context from a file. + For customized read_words method, see ``tutorial_generate_text.py``. Parameters - ----------- - filename : a string - A file path (like .txt file) - replace : a list - [original string, target string], to disable replace use ['', ''] + ---------- + filename : str + a file path. + replace : list of str + replace original string by target string. Returns + ------- + list of str + The context in a list (split using space). + + See Also -------- - The context in a list, split by space by default, and use ```` to represent ``\\n``, - e.g. ``[... 'how', 'useful', 'it', "'s" ... ]``. + - `tensorflow.models.rnn.ptb.reader `__ - References - --------------- - - `tensorflow.models.rnn.ptb.reader `_ """ with tf.gfile.GFile(filename, "r") as f: - try: # python 3.4 or older + try: # python 3.4 or older context_list = f.read().replace(*replace).split() except: # python 3.5 f.seek(0) @@ -454,20 +478,20 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}): Parameters ---------- - eval_data : a string + eval_file : str The file name. - word2id : a dictionary - Mapping words to unique IDs. + word2id : dictionary + a dictionary that maps word to ID. Returns -------- - analogy_questions : a [n, 4] numpy array containing the analogy question's - word ids. - questions_skipped: questions skipped due to unknown words. + numpy.array + A ``[n_examples, 4]`` numpy array containing the analogy question's word IDs. Examples --------- - >>> eval_file should be in this format : + The file should be in this format + >>> : capital-common-countries >>> Athens Greece Baghdad Iraq >>> Athens Greece Bangkok Thailand @@ -478,13 +502,12 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}): >>> Athens Greece Canberra Australia >>> Athens Greece Hanoi Vietnam >>> Athens Greece Havana Cuba - ... + + Get the tokenized analogy question data >>> words = tl.files.load_matt_mahoney_text8_dataset() - >>> data, count, dictionary, reverse_dictionary = \ - tl.nlp.build_words_dataset(words, vocabulary_size, True) - >>> analogy_questions = tl.nlp.read_analogies_file( \ - eval_file='questions-words.txt', word2id=dictionary) + >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) + >>> analogy_questions = tl.nlp.read_analogies_file(eval_file='questions-words.txt', word2id=dictionary) >>> print(analogy_questions) ... [[ 3068 1248 7161 1581] ... [ 3068 1248 28683 5642] @@ -493,6 +516,7 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}): ... [ 1216 4309 19982 25506] ... [ 1216 4309 3194 8650] ... [ 1216 4309 140 312]] + """ questions = [] questions_skipped = 0 @@ -506,28 +530,29 @@ def read_analogies_file(eval_file='questions-words.txt', word2id={}): questions_skipped += 1 else: questions.append(np.array(ids)) - print("Eval analogy file: ", eval_file) - print("Questions: ", len(questions)) - print("Skipped: ", questions_skipped) + logging.info("Eval analogy file: %s" % eval_file) + logging.info("Questions: %d", len(questions)) + logging.info("Skipped: %d", questions_skipped) analogy_questions = np.array(questions, dtype=np.int32) return analogy_questions def build_vocab(data): """Build vocabulary. + Given the context in list format. Return the vocabulary, which is a dictionary for word to id. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } Parameters ---------- - data : a list of string - the context in list format + data : list of str + The context in list format Returns -------- - word_to_id : a dictionary - mapping words to unique IDs. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } + dictionary + that maps word to unique ID. e.g. {'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 .... } References --------------- @@ -538,32 +563,34 @@ def build_vocab(data): >>> data_path = os.getcwd() + '/simple-examples/data' >>> train_path = os.path.join(data_path, "ptb.train.txt") >>> word_to_id = build_vocab(read_txt_words(train_path)) + """ # data = _read_words(filename) counter = collections.Counter(data) - # print('counter', counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1 + # logging.info('counter %s' % counter) # dictionary for the occurrence number of each word, e.g. 'banknote': 1, 'photography': 1, 'kia': 1 count_pairs = sorted(counter.items(), key=lambda x: (-x[1], x[0])) - # print('count_pairs',count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1) + # logging.info('count_pairs %s' % count_pairs) # convert dictionary to list of tuple, e.g. ('ssangyong', 1), ('swapo', 1), ('wachter', 1) words, _ = list(zip(*count_pairs)) word_to_id = dict(zip(words, range(len(words)))) - # print(words) # list of words - # print(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 + # logging.info(words) # list of words + # logging.info(word_to_id) # dictionary for word to id, e.g. 'campbell': 2587, 'atlantic': 2247, 'aoun': 6746 return word_to_id def build_reverse_dictionary(word_to_id): - """Given a dictionary for converting word to integer id. - Returns a reverse dictionary for converting a id to word. + """Given a dictionary that maps word to integer id. + Returns a reverse dictionary that maps a id to word. Parameters ---------- word_to_id : dictionary - mapping words to unique ids + that maps word to ID. Returns -------- - reverse_dictionary : a dictionary - mapping ids to words + dictionary + A dictionary that maps IDs to words. + """ reverse_dictionary = dict(zip(word_to_id.values(), word_to_id.keys())) return reverse_dictionary @@ -575,29 +602,28 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key Parameters ---------- - words : a list of string or byte - The context in list format. You may need to do preprocessing on the words, - such as lower case, remove marks etc. - vocabulary_size : an int - The maximum vocabulary size, limiting the vocabulary size. - Then the script replaces rare words with 'UNK' token. + words : list of str or byte + The context in list format. You may need to do preprocessing on the words, such as lower case, remove marks etc. + vocabulary_size : int + The maximum vocabulary size, limiting the vocabulary size. Then the script replaces rare words with 'UNK' token. printable : boolean Whether to print the read vocabulary size of the given words. - unk_key : a string - Unknown words = unk_key + unk_key : str + Represent the unknown words. Returns -------- - data : a list of integer - The context in a list of ids - count : a list of tuple and list - count[0] is a list : the number of rare words\n - count[1:] are tuples : the number of occurrence of each word\n - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] - dictionary : a dictionary - word_to_id, mapping words to unique IDs. + data : list of int + The context in a list of ID. + count : list of tuple and list + Pair words and IDs. + - count[0] is a list : the number of rare words + - count[1:] are tuples : the number of occurrence of each word + - e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] + dictionary : dictionary + It is `word_to_id` that maps word to ID. reverse_dictionary : a dictionary - id_to_word, mapping id to unique word. + It is `id_to_word` that maps ID to word. Examples -------- @@ -607,7 +633,8 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key References ----------------- - - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `_ + - `tensorflow/examples/tutorials/word2vec/word2vec_basic.py `__ + """ import collections count = [[unk_key, -1]] @@ -627,36 +654,35 @@ def build_words_dataset(words=[], vocabulary_size=50000, printable=True, unk_key count[0][1] = unk_count reverse_dictionary = dict(zip(dictionary.values(), dictionary.keys())) if printable: - print('Real vocabulary size %d' % len(collections.Counter(words).keys())) - print('Limited vocabulary size {}'.format(vocabulary_size)) + logging.info('Real vocabulary size %d' % len(collections.Counter(words).keys())) + logging.info('Limited vocabulary size {}'.format(vocabulary_size)) assert len(collections.Counter(words).keys()) >= vocabulary_size, \ "the limited vocabulary_size must be less than or equal to the read vocabulary_size" return data, count, dictionary, reverse_dictionary def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'): - """Given a context (words) in list format and the vocabulary, - Returns a list of IDs to represent the context. + """Convert a list of string (words) to IDs. Parameters ---------- - data : a list of string or byte - the context in list format + data : list of string or byte + The context in list format word_to_id : a dictionary - mapping words to unique IDs. - unk_key : a string - Unknown words = unk_key + that maps word to ID. + unk_key : str + Represent the unknown words. Returns -------- - A list of IDs to represent the context. + list of int + A list of IDs to represent the context. Examples -------- >>> words = tl.files.load_matt_mahoney_text8_dataset() >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = \ - ... tl.nlp.build_words_dataset(words, vocabulary_size, True) + >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) >>> context = [b'hello', b'how', b'are', b'you'] >>> ids = tl.nlp.words_to_word_ids(words, dictionary) >>> context = tl.nlp.word_ids_to_words(ids, reverse_dictionary) @@ -667,13 +693,14 @@ def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'): References --------------- - - `tensorflow.models.rnn.ptb.reader `_ + - `tensorflow.models.rnn.ptb.reader `__ + """ # if isinstance(data[0], six.string_types): - # print(type(data[0])) + # logging.info(type(data[0])) # # exit() - # print(data[0]) - # print(word_to_id) + # logging.info(data[0]) + # logging.info(word_to_id) # return [word_to_id[str(word)] for word in data] # else: @@ -687,32 +714,33 @@ def words_to_word_ids(data=[], word_to_id={}, unk_key='UNK'): # return [word_to_id[word] for word in data] # this one # if isinstance(data[0], str): - # # print('is a string object') + # # logging.info('is a string object') # return [word_to_id[word] for word in data] # else:#if isinstance(s, bytes): - # # print('is a unicode object') - # # print(data[0]) + # # logging.info('is a unicode object') + # # logging.info(data[0]) # return [word_to_id[str(word)] f def word_ids_to_words(data, id_to_word): - """Given a context (ids) in list format and the vocabulary, - Returns a list of words to represent the context. + """Convert a list of integer to strings (words). Parameters ---------- - data : a list of integer - the context in list format - id_to_word : a dictionary - mapping id to unique word. + data : list of int + The context in list format. + id_to_word : dictionary + a dictionary that maps ID to word. Returns -------- - A list of string or byte to represent the context. + list of str + A list of string or byte to represent the context. Examples --------- - >>> see words_to_word_ids + >>> see ``tl.nlp.words_to_word_ids`` + """ return [id_to_word[i] for i in data] @@ -723,16 +751,15 @@ def save_vocab(count=[], name='vocab.txt'): Parameters ---------- count : a list of tuple and list - count[0] is a list : the number of rare words\n - count[1:] are tuples : the number of occurrence of each word\n + count[0] is a list : the number of rare words, + count[1:] are tuples : the number of occurrence of each word, e.g. [['UNK', 418391], (b'the', 1061396), (b'of', 593677), (b'and', 416629), (b'one', 411764)] Examples --------- >>> words = tl.files.load_matt_mahoney_text8_dataset() >>> vocabulary_size = 50000 - >>> data, count, dictionary, reverse_dictionary = \ - ... tl.nlp.build_words_dataset(words, vocabulary_size, True) + >>> data, count, dictionary, reverse_dictionary = tl.nlp.build_words_dataset(words, vocabulary_size, True) >>> tl.nlp.save_vocab(count, name='vocab_text8.txt') >>> vocab_text8.txt ... UNK 418391 @@ -743,13 +770,15 @@ def save_vocab(count=[], name='vocab.txt'): ... in 372201 ... a 325873 ... to 316376 + """ pwd = os.getcwd() vocabulary_size = len(count) with open(os.path.join(pwd, name), "w") as f: for i in xrange(vocabulary_size): f.write("%s %d\n" % (tf.compat.as_text(count[i][0]), count[i][1])) - print("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd)) + logging.info("%d vocab saved to %s in %s" % (vocabulary_size, name, pwd)) + # Functions for translation @@ -771,7 +800,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): >>> with gfile.GFile(train_path + ".en", mode="rb") as f: >>> for line in f: >>> tokens = tl.nlp.basic_tokenizer(line) - >>> print(tokens) + >>> logging.info(tokens) >>> exit() ... [b'Changing', b'Lives', b'|', b'Changing', b'Society', b'|', b'How', ... b'It', b'Works', b'|', b'Technology', b'Drives', b'Change', b'Home', @@ -782,6 +811,7 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): References ---------- - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` + """ words = [] sentence = tf.compat.as_bytes(sentence) @@ -790,8 +820,11 @@ def basic_tokenizer(sentence, _WORD_SPLIT=re.compile(b"([.,!?\"':;)(])")): return [w for w in words if w] -def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, - tokenizer=None, normalize_digits=True, +def create_vocabulary(vocabulary_path, + data_path, + max_vocabulary_size, + tokenizer=None, + normalize_digits=True, _DIGIT_RE=re.compile(br"\d"), _START_VOCAB=[b"_PAD", b"_GO", b"_EOS", b"_UNK"]): """Create vocabulary file (if it does not exist yet) from data file. @@ -804,27 +837,31 @@ def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, Parameters ----------- - vocabulary_path : path where the vocabulary will be created. - data_path : data file that will be used to create vocabulary. - max_vocabulary_size : limit on the size of the created vocabulary. - tokenizer : a function to use to tokenize each data sentence. - if None, basic_tokenizer will be used. - normalize_digits : Boolean - if true, all digits are replaced by 0s. + vocabulary_path : str + Path where the vocabulary will be created. + data_path : str + Data file that will be used to create vocabulary. + max_vocabulary_size : int + Limit on the size of the created vocabulary. + tokenizer : function + A function to use to tokenize each data sentence. If None, basic_tokenizer will be used. + normalize_digits : boolean + If true, all digits are replaced by `0`. References ---------- - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` + """ if not gfile.Exists(vocabulary_path): - print("Creating vocabulary %s from data %s" % (vocabulary_path, data_path)) + logging.info("Creating vocabulary %s from data %s" % (vocabulary_path, data_path)) vocab = {} with gfile.GFile(data_path, mode="rb") as f: counter = 0 for line in f: counter += 1 if counter % 100000 == 0: - print(" processing line %d" % counter) + logging.info(" processing line %d" % counter) tokens = tokenizer(line) if tokenizer else basic_tokenizer(line) for w in tokens: word = re.sub(_DIGIT_RE, b"0", w) if normalize_digits else w @@ -839,29 +876,26 @@ def create_vocabulary(vocabulary_path, data_path, max_vocabulary_size, for w in vocab_list: vocab_file.write(w + b"\n") else: - print("Vocabulary %s from data %s exists" % (vocabulary_path, data_path)) + logging.info("Vocabulary %s from data %s exists" % (vocabulary_path, data_path)) def initialize_vocabulary(vocabulary_path): - """Initialize vocabulary from file, return the word_to_id (dictionary) - and id_to_word (list). + """Initialize vocabulary from file, return the `word_to_id` (dictionary) + and `id_to_word` (list). - We assume the vocabulary is stored one-item-per-line, so a file:\n - dog\n - cat\n - will result in a vocabulary {"dog": 0, "cat": 1}, and this function will - also return the reversed-vocabulary ["dog", "cat"]. + We assume the vocabulary is stored one-item-per-line, so a file will result in a vocabulary {"dog": 0, "cat": 1}, and this function will also return the reversed-vocabulary ["dog", "cat"]. Parameters ----------- - vocabulary_path : path to the file containing the vocabulary. + vocabulary_path : str + Path to the file containing the vocabulary. Returns -------- - vocab : a dictionary - Word to id. A dictionary mapping string to integers. - rev_vocab : a list - Id to word. The reversed vocabulary (a list, which reverses the vocabulary mapping). + vocab : dictionary + a dictionary that maps word to ID. + rev_vocab : list of int + a list that maps ID to word. Examples --------- @@ -878,6 +912,7 @@ def initialize_vocabulary(vocabulary_path): Raises ------- ValueError : if the provided vocabulary_path does not exist. + """ if gfile.Exists(vocabulary_path): rev_vocab = [] @@ -890,9 +925,7 @@ def initialize_vocabulary(vocabulary_path): raise ValueError("Vocabulary file %s not found.", vocabulary_path) -def sentence_to_token_ids(sentence, vocabulary, - tokenizer=None, normalize_digits=True, - UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): +def sentence_to_token_ids(sentence, vocabulary, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): """Convert a string to list of integers representing token-ids. For example, a sentence "I have a dog" may become tokenized into @@ -901,20 +934,21 @@ def sentence_to_token_ids(sentence, vocabulary, Parameters ----------- - sentence : tensorflow.python.platform.gfile.GFile Object - The sentence in bytes format to convert to token-ids.\n - see basic_tokenizer(), data_to_token_ids() - vocabulary : a dictionary mapping tokens to integers. - tokenizer : a function to use to tokenize each sentence; - If None, basic_tokenizer will be used. - normalize_digits : Boolean - If true, all digits are replaced by 0s. + sentence : tensorflow.python.platform.gfile.GFile Object + The sentence in bytes format to convert to token-ids, see ``basic_tokenizer()`` and ``data_to_token_ids()``. + vocabulary : dictionary + Mmapping tokens to integers. + tokenizer : function + A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. + normalize_digits : boolean + If true, all digits are replaced by 0. Returns -------- - A list of integers, the token-ids for the sentence. - """ + list of int + The token-ids for the sentence. + """ if tokenizer: words = tokenizer(sentence) else: @@ -925,9 +959,7 @@ def sentence_to_token_ids(sentence, vocabulary, return [vocabulary.get(re.sub(_DIGIT_RE, b"0", w), UNK_ID) for w in words] -def data_to_token_ids(data_path, target_path, vocabulary_path, - tokenizer=None, normalize_digits=True, - UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): +def data_to_token_ids(data_path, target_path, vocabulary_path, tokenizer=None, normalize_digits=True, UNK_ID=3, _DIGIT_RE=re.compile(br"\d")): """Tokenize data file and turn into token-ids using given vocabulary file. This function loads data line-by-line from data_path, calls the above @@ -936,19 +968,24 @@ def data_to_token_ids(data_path, target_path, vocabulary_path, Parameters ----------- - data_path : path to the data file in one-sentence-per-line format. - target_path : path where the file with token-ids will be created. - vocabulary_path : path to the vocabulary file. - tokenizer : a function to use to tokenize each sentence; - if None, basic_tokenizer will be used. - normalize_digits : Boolean; if true, all digits are replaced by 0s. + data_path : str + Path to the data file in one-sentence-per-line format. + target_path : str + Path where the file with token-ids will be created. + vocabulary_path : str + Path to the vocabulary file. + tokenizer : function + A function to use to tokenize each sentence. If None, ``basic_tokenizer`` will be used. + normalize_digits : boolean + If true, all digits are replaced by 0. References ---------- - Code from ``/tensorflow/models/rnn/translation/data_utils.py`` + """ if not gfile.Exists(target_path): - print("Tokenizing data in %s" % data_path) + logging.info("Tokenizing data in %s" % data_path) vocab, _ = initialize_vocabulary(vocabulary_path) with gfile.GFile(data_path, mode="rb") as data_file: with gfile.GFile(target_path, mode="w") as tokens_file: @@ -956,30 +993,25 @@ def data_to_token_ids(data_path, target_path, vocabulary_path, for line in data_file: counter += 1 if counter % 100000 == 0: - print(" tokenizing line %d" % counter) - token_ids = sentence_to_token_ids(line, vocab, tokenizer, - normalize_digits, UNK_ID=UNK_ID, - _DIGIT_RE=_DIGIT_RE) + logging.info(" tokenizing line %d" % counter) + token_ids = sentence_to_token_ids(line, vocab, tokenizer, normalize_digits, UNK_ID=UNK_ID, _DIGIT_RE=_DIGIT_RE) tokens_file.write(" ".join([str(tok) for tok in token_ids]) + "\n") else: - print("Target path %s exists" % target_path) - - -# Metric -import subprocess -import tempfile -from six.moves import urllib + logging.info("Target path %s exists" % target_path) -def moses_multi_bleu(hypotheses, references, lowercase=False): # tl.nlp +def moses_multi_bleu(hypotheses, references, lowercase=False): """Calculate the bleu score for hypotheses and references using the MOSES ulti-bleu.perl script. Parameters ------------ - hypotheses : A numpy array of strings where each string is a single example. - references : A numpy array of strings where each string is a single example. - lowercase : If true, pass the "-lc" flag to the multi-bleu script + hypotheses : numpy.array.string + A numpy array of strings where each string is a single example. + references : numpy.array.string + A numpy array of strings where each string is a single example. + lowercase : boolean + If True, pass the "-lc" flag to the multi-bleu script Examples --------- @@ -989,21 +1021,20 @@ def moses_multi_bleu(hypotheses, references, lowercase=False): # tl.nlp Returns -------- - The BLEU score as a float32 value. + float + The BLEU score References ---------- - - `Google/seq2seq/metric/bleu `_ - """ + - `Google/seq2seq/metric/bleu `__ + """ if np.size(hypotheses) == 0: return np.float32(0.0) # Get MOSES multi-bleu script try: - multi_bleu_path, _ = urllib.request.urlretrieve( - "https://raw.githubusercontent.com/moses-smt/mosesdecoder/" - "master/scripts/generic/multi-bleu.perl") + multi_bleu_path, _ = urllib.request.urlretrieve("https://raw.githubusercontent.com/moses-smt/mosesdecoder/" "master/scripts/generic/multi-bleu.perl") os.chmod(multi_bleu_path, 0o755) except: # pylint: disable=W0702 tf.logging.info("Unable to fetch multi-bleu.perl script, using local.") @@ -1028,8 +1059,7 @@ def moses_multi_bleu(hypotheses, references, lowercase=False): # tl.nlp bleu_cmd += ["-lc"] bleu_cmd += [reference_file.name] try: - bleu_out = subprocess.check_output( - bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT) + bleu_out = subprocess.check_output(bleu_cmd, stdin=read_pred, stderr=subprocess.STDOUT) bleu_out = bleu_out.decode("utf-8") bleu_score = re.search(r"BLEU = (.+?),", bleu_out).group(1) bleu_score = float(bleu_score) diff --git a/tensorlayer/prepro.py b/tensorlayer/prepro.py index 11272a1e..35d786d8 100644 --- a/tensorlayer/prepro.py +++ b/tensorlayer/prepro.py @@ -1,19 +1,20 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- +import sys +import threading +import time -import tensorflow as tf -import tensorlayer as tl import numpy as np +import scipy +import scipy.ndimage as ndi +import skimage +import tensorlayer as tl +from scipy import linalg +from scipy.ndimage.filters import gaussian_filter +from scipy.ndimage.interpolation import map_coordinates +from six.moves import range +from skimage import exposure, transform -import time -import numbers -import random -import os -import re -import sys - -import threading # import Queue # <-- donot work for py3 is_py2 = sys.version[0] == '2' if is_py2: @@ -21,279 +22,300 @@ else: import queue as queue -from six.moves import range -import scipy -from scipy import linalg -import scipy.ndimage as ndi - -from skimage import transform -from skimage import exposure -import skimage - -from multiprocessing import Pool - # linalg https://docs.scipy.org/doc/scipy/reference/linalg.html # ndimage https://docs.scipy.org/doc/scipy/reference/ndimage.html -## Threading + def threading_data(data=None, fn=None, thread_count=None, **kwargs): - """Return a batch of result by given data. + """Process a batch of data by given function by threading. + Usually be used for data augmentation. Parameters ----------- - data : numpy array, file names and etc, see Examples below. - thread_count : the number of threads to use - fn : the function for data processing. - more args : the args for fn, see Examples below. + data : numpy.array or others + The data to be processed. + thread_count : int + The number of threads to use. + fn : function + The function for data processing. + more args : the args for `fn` + Ssee Examples below. Examples -------- - - Single array - >>> X --> [batch_size, row, col, 1] greyscale - >>> results = threading_data(X, zoom, zoom_range=[0.5, 1], is_random=True) - ... results --> [batch_size, row, col, channel] - >>> tl.visualize.images2d(images=np.asarray(results), second=0.01, saveable=True, name='after', dtype=None) - >>> tl.visualize.images2d(images=np.asarray(X), second=0.01, saveable=True, name='before', dtype=None) - - - List of array (e.g. functions with ``multi``) - >>> X, Y --> [batch_size, row, col, 1] greyscale - >>> data = threading_data([_ for _ in zip(X, Y)], zoom_multi, zoom_range=[0.5, 1], is_random=True) + Process images. + + >>> images, _, _, _ = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3)) + >>> images = tl.prepro.threading_data(images[0:32], tl.prepro.zoom, zoom_range=[0.5, 1]) + + Customized image preprocessing function. + + >>> def distort_img(x): + ... x = tl.prepro.flip_axis(x, axis=0, is_random=True) + ... x = tl.prepro.flip_axis(x, axis=1, is_random=True) + ... x = tl.prepro.crop(x, 100, 100, is_random=True) + ... return x + >>> images = tl.prepro.threading_data(images, distort_img) + + Process images and masks together (Usually be used for image segmentation). + + >>> X, Y --> [batch_size, row, col, 1] + >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], tl.prepro.zoom_multi, zoom_range=[0.5, 1], is_random=True) ... data --> [batch_size, 2, row, col, 1] >>> X_, Y_ = data.transpose((1,0,2,3,4)) ... X_, Y_ --> [batch_size, row, col, 1] - >>> tl.visualize.images2d(images=np.asarray(X_), second=0.01, saveable=True, name='after', dtype=None) - >>> tl.visualize.images2d(images=np.asarray(Y_), second=0.01, saveable=True, name='before', dtype=None) + >>> tl.vis.save_image(X_, 'images.png') + >>> tl.vis.save_image(Y_, 'masks.png') + + Process images and masks together by using ``thread_count``. - - Single array split across ``thread_count`` threads (e.g. functions with ``multi``) - >>> X, Y --> [batch_size, row, col, 1] greyscale - >>> data = threading_data(X, zoom_multi, 8, zoom_range=[0.5, 1], is_random=True) + >>> X, Y --> [batch_size, row, col, 1] + >>> data = tl.prepro.threading_data(X, tl.prepro.zoom_multi, 8, zoom_range=[0.5, 1], is_random=True) ... data --> [batch_size, 2, row, col, 1] >>> X_, Y_ = data.transpose((1,0,2,3,4)) ... X_, Y_ --> [batch_size, row, col, 1] - >>> tl.visualize.images2d(images=np.asarray(X_), second=0.01, saveable=True, name='after', dtype=None) - >>> tl.visualize.images2d(images=np.asarray(Y_), second=0.01, saveable=True, name='before', dtype=None) + >>> tl.vis.save_image(X_, 'after.png') + >>> tl.vis.save_image(Y_, 'before.png') + + Customized function for processing images and masks together. - - Customized function for image segmentation >>> def distort_img(data): ... x, y = data - ... x, y = flip_axis_multi([x, y], axis=0, is_random=True) - ... x, y = flip_axis_multi([x, y], axis=1, is_random=True) - ... x, y = crop_multi([x, y], 100, 100, is_random=True) + ... x, y = tl.prepro.flip_axis_multi([x, y], axis=0, is_random=True) + ... x, y = tl.prepro.flip_axis_multi([x, y], axis=1, is_random=True) + ... x, y = tl.prepro.crop_multi([x, y], 100, 100, is_random=True) ... return x, y >>> X, Y --> [batch_size, row, col, channel] - >>> data = threading_data([_ for _ in zip(X, Y)], distort_img) + >>> data = tl.prepro.threading_data([_ for _ in zip(X, Y)], distort_img) >>> X_, Y_ = data.transpose((1,0,2,3,4)) + Returns + ------- + list or numpyarray + The processed results. + References ---------- - - `python queue `_ - - `run with limited queue `_ - """ - ## plot function info - # for name, value in kwargs.items(): - # print('{0} = {1}'.format(name, value)) - # exit() - # define function for threading + - `python queue `__ + - `run with limited queue `__ + + """ + def apply_fn(results, i, data, kwargs): results[i] = fn(data, **kwargs) - ## start multi-threaded reading. - if thread_count is None: # by Milo - results = [None] * len(data) ## preallocate result list + if thread_count is None: + results = [None] * len(data) threads = [] for i in range(len(data)): - t = threading.Thread( - name='threading_and_return', - target=apply_fn, - args=(results, i, data[i], kwargs) - ) + t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[i], kwargs)) t.start() threads.append(t) - else: # by geometrikal + else: divs = np.linspace(0, len(data), thread_count + 1) divs = np.round(divs).astype(int) results = [None] * thread_count threads = [] for i in range(thread_count): - t = threading.Thread( - name='threading_and_return', - target=apply_fn, - args=(results, i, data[divs[i]:divs[i + 1]], kwargs) - ) + t = threading.Thread(name='threading_and_return', target=apply_fn, args=(results, i, data[divs[i]:divs[i + 1]], kwargs)) t.start() threads.append(t) - ## wait for all threads to complete for t in threads: t.join() if thread_count is None: try: return np.asarray(results) - except: # if dim don't match + except: return results else: return np.concatenate(results) -## Image -def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): +def rotation(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Rotate an image randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). rg : int or float Degree to rotate, usually 0 ~ 180. - is_random : boolean, default False - If True, randomly rotate. - row_index, col_index, channel_index : int + is_random : boolean + If True, randomly rotate. Default is False + row_index col_index and channel_index : int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’ + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ + cval : float + Value used for points outside the boundaries of the input if mode=`constant`. Default is 0.0 + order : int + The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - `scipy ndimage affine_transform `_ - cval : scalar, optional - Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0 - order : int, optional - The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``. - - - `scipy ndimage affine_transform `_ + Returns + ------- + numpy.array + A processed image. Examples --------- - >>> x --> [row, col, 1] greyscale - >>> x = rotation(x, rg=40, is_random=False) - >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='temp',cmap='gray') + >>> x --> [row, col, 1] + >>> x = tl.prepro.rotation(x, rg=40, is_random=False) + >>> tl.vis.save_image(x, 'im.png') + """ if is_random: theta = np.pi / 180 * np.random.uniform(-rg, rg) else: - theta = np.pi /180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], - [np.sin(theta), np.cos(theta), 0], - [0, 0, 1]]) + theta = np.pi / 180 * rg + rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) h, w = x.shape[row_index], x.shape[col_index] transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) return x -def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): + +def rotation_multi(x, rg=20, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Rotate multiple images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``rotation``. + others : args + See ``tl.prepro.rotation``. + + Returns + ------- + numpy.array + A list of processed images. Examples -------- >>> x, y --> [row, col, 1] greyscale - >>> x, y = rotation_multi([x, y], rg=90, is_random=False) - >>> tl.visualize.frame(x[:,:,0], second=0.01, saveable=True, name='x',cmap='gray') - >>> tl.visualize.frame(y[:,:,0], second=0.01, saveable=True, name='y',cmap='gray') + >>> x, y = tl.prepro.rotation_multi([x, y], rg=90, is_random=False) + """ if is_random: theta = np.pi / 180 * np.random.uniform(-rg, rg) else: - theta = np.pi /180 * rg - rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], - [np.sin(theta), np.cos(theta), 0], - [0, 0, 1]]) + theta = np.pi / 180 * rg + rotation_matrix = np.array([[np.cos(theta), -np.sin(theta), 0], [np.sin(theta), np.cos(theta), 0], [0, 0, 1]]) h, w = x[0].shape[row_index], x[0].shape[col_index] transform_matrix = transform_matrix_offset_center(rotation_matrix, h, w) results = [] for data in x: - results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) + results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) return np.asarray(results) + # crop -def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2): +def crop(x, wrg, hrg, is_random=False, row_index=0, col_index=1): """Randomly or centrally crop an image. Parameters ---------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). wrg : int Size of width. hrg : int Size of height. - is_random : boolean, default False - If True, randomly crop, else central crop. - row_index, col_index, channel_index : int - Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). + is_random : boolean, + If True, randomly crop, else central crop. Default is False. + row_index: int + index of row. + col_index: int + index of column. + + Returns + ------- + numpy.array + A processed image. + """ h, w = x.shape[row_index], x.shape[col_index] assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" if is_random: - h_offset = int(np.random.uniform(0, h-hrg) -1) - w_offset = int(np.random.uniform(0, w-wrg) -1) - # print(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape) - return x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset] - else: # central crop - h_offset = int(np.floor((h - hrg)/2.)) - w_offset = int(np.floor((w - wrg)/2.)) + h_offset = int(np.random.uniform(0, h - hrg) - 1) + w_offset = int(np.random.uniform(0, w - wrg) - 1) + # logging.info(h_offset, w_offset, x[h_offset: hrg+h_offset ,w_offset: wrg+w_offset].shape) + return x[h_offset:hrg + h_offset, w_offset:wrg + w_offset] + else: # central crop + h_offset = int(np.floor((h - hrg) / 2.)) + w_offset = int(np.floor((w - wrg) / 2.)) h_end = h_offset + hrg w_end = w_offset + wrg - return x[h_offset: h_end, w_offset: w_end] + return x[h_offset:h_end, w_offset:w_end] # old implementation # h_offset = (h - hrg)/2 # w_offset = (w - wrg)/2 - # # print(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape) + # # logging.info(x[h_offset: h-h_offset ,w_offset: w-w_offset].shape) # return x[h_offset: h-h_offset ,w_offset: w-w_offset] # central crop -def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1, channel_index=2): +def crop_multi(x, wrg, hrg, is_random=False, row_index=0, col_index=1): """Randomly or centrally crop multiple images. Parameters ---------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``crop``. + others : args + See ``tl.prepro.crop``. + + Returns + ------- + numpy.array + A list of processed images. + """ h, w = x[0].shape[row_index], x[0].shape[col_index] assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" if is_random: - h_offset = int(np.random.uniform(0, h-hrg) -1) - w_offset = int(np.random.uniform(0, w-wrg) -1) + h_offset = int(np.random.uniform(0, h - hrg) - 1) + w_offset = int(np.random.uniform(0, w - wrg) - 1) results = [] for data in x: - results.append( data[h_offset: hrg+h_offset ,w_offset: wrg+w_offset]) + results.append(data[h_offset:hrg + h_offset, w_offset:wrg + w_offset]) return np.asarray(results) else: # central crop - h_offset = (h - hrg)/2 - w_offset = (w - wrg)/2 + h_offset = (h - hrg) / 2 + w_offset = (w - wrg) / 2 results = [] for data in x: - results.append( data[h_offset: h-h_offset ,w_offset: w-w_offset] ) + results.append(data[h_offset:h - h_offset, w_offset:w - w_offset]) return np.asarray(results) + # flip def flip_axis(x, axis=1, is_random=False): """Flip the axis of an image, such as flip left and right, up and down, randomly or non-randomly, Parameters ---------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). axis : int - - 0, flip up and down - - 1, flip left and right - - 2, flip channel - is_random : boolean, default False - If True, randomly flip. + Which axis to flip. + - 0, flip up and down + - 1, flip left and right + - 2, flip channel + is_random : boolean + If True, randomly flip. Default is False. + + Returns + ------- + numpy.array + A processed image. + """ if is_random: factor = np.random.uniform(-1, 1) @@ -310,14 +332,22 @@ def flip_axis(x, axis=1, is_random=False): x = x.swapaxes(0, axis) return x + def flip_axis_multi(x, axis, is_random=False): """Flip the axises of multiple images together, such as flip left and right, up and down, randomly or non-randomly, Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``flip_axis``. + others : args + See ``tl.prepro.flip_axis``. + + Returns + ------- + numpy.array + A list of processed images. + """ if is_random: factor = np.random.uniform(-1, 1) @@ -331,7 +361,7 @@ def flip_axis_multi(x, axis, is_random=False): data = np.asarray(data).swapaxes(axis, 0) data = data[::-1, ...] data = data.swapaxes(0, axis) - results.append( data ) + results.append(data) return np.asarray(results) else: return np.asarray(x) @@ -345,36 +375,38 @@ def flip_axis_multi(x, axis, is_random=False): data = np.asarray(data).swapaxes(axis, 0) data = data[::-1, ...] data = data.swapaxes(0, axis) - results.append( data ) + results.append(data) return np.asarray(results) + # shift -def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): +def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shift an image randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). wrg : float Percentage of shift in axis x, usually -0.25 ~ 0.25. hrg : float Percentage of shift in axis y, usually -0.25 ~ 0.25. - is_random : boolean, default False - If True, randomly shift. - row_index, col_index, channel_index : int + is_random : boolean + If True, randomly shift. Default is False. + row_index col_index and channel_index : int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’. - - - `scipy ndimage affine_transform `_ - cval : scalar, optional + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ + cval : float Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int, optional - The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``. + order : int + The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ + + Returns + ------- + numpy.array + A processed image. - - `scipy ndimage affine_transform `_ """ h, w = x.shape[row_index], x.shape[col_index] if is_random: @@ -382,24 +414,29 @@ def shift(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channe ty = np.random.uniform(-wrg, wrg) * w else: tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) + translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) transform_matrix = translation_matrix # no need to do offset x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) return x -def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): + +def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shift images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``shift``. + others : args + See ``tl.prepro.shift``. + + Returns + ------- + numpy.array + A list of processed images. + """ h, w = x[0].shape[row_index], x[0].shape[col_index] if is_random: @@ -407,196 +444,223 @@ def shift_multi(x, wrg=0.1, hrg=0.1, is_random=False, row_index=0, col_index=1, ty = np.random.uniform(-wrg, wrg) * w else: tx, ty = hrg * h, wrg * w - translation_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) + translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) transform_matrix = translation_matrix # no need to do offset results = [] for data in x: - results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) + results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) return np.asarray(results) + # shear -def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): +def shear(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shear an image randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). intensity : float Percentage of shear, usually -0.5 ~ 0.5 (is_random==True), 0 ~ 0.5 (is_random==False), you can have a quick try by shear(X, 1). - is_random : boolean, default False - If True, randomly shear. - row_index, col_index, channel_index : int + is_random : boolean + If True, randomly shear. Default is False. + row_index col_index and channel_index : int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’. - - - `scipy ndimage affine_transform `_ - cval : scalar, optional + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see and `scipy ndimage affine_transform `__ + cval : float Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int, optional - The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``. + order : int + The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - `scipy ndimage affine_transform `_ + Returns + ------- + numpy.array + A processed image. References ----------- - - `Affine transformation `_ + - `Affine transformation `__ + """ if is_random: shear = np.random.uniform(-intensity, intensity) else: shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], - [0, np.cos(shear), 0], - [0, 0, 1]]) + shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) h, w = x.shape[row_index], x.shape[col_index] transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) return x -def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): + +def shear_multi(x, intensity=0.1, is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shear images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``tl.prepro.shear``. + others : args + See ``tl.prepro.shear``. + + Returns + ------- + numpy.array + A list of processed images. + """ if is_random: shear = np.random.uniform(-intensity, intensity) else: shear = intensity - shear_matrix = np.array([[1, -np.sin(shear), 0], - [0, np.cos(shear), 0], - [0, 0, 1]]) + shear_matrix = np.array([[1, -np.sin(shear), 0], [0, np.cos(shear), 0], [0, 0, 1]]) h, w = x[0].shape[row_index], x[0].shape[col_index] transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) results = [] for data in x: - results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) + results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) return np.asarray(results) -def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): + +def shear2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shear an image randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). shear : tuple of two floats Percentage of shear for height and width direction (0, 1). - is_random : boolean, default False - If True, randomly shear. - row_index, col_index, channel_index : int + is_random : boolean + If True, randomly shear. Default is False. + row_index col_index and channel_index : int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’. - - - `scipy ndimage affine_transform `_ - cval : scalar, optional + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ + cval : float Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int, optional - The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``. + order : int + The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ - - `scipy ndimage affine_transform `_ + Returns + ------- + numpy.array + A processed image. References ----------- - - `Affine transformation `_ + - `Affine transformation `__ + """ assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear rather than tl.prepro.shear2 ?" if is_random: shear[0] = np.random.uniform(-shear[0], shear[0]) shear[1] = np.random.uniform(-shear[1], shear[1]) - shear_matrix = np.array([[1, shear[0], 0], - [shear[1], 1, 0], - [0, 0, 1]]) + shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]]) h, w = x.shape[row_index], x.shape[col_index] transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) return x -def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): + +def shear_multi2(x, shear=(0.1, 0.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Shear images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``tl.prepro.shear2``. + others : args + See ``tl.prepro.shear2``. + + Returns + ------- + numpy.array + A list of processed images. + """ assert len(shear) == 2, "shear should be tuple of 2 floats, or you want to use tl.prepro.shear_multi rather than tl.prepro.shear_multi2 ?" if is_random: shear[0] = np.random.uniform(-shear[0], shear[0]) shear[1] = np.random.uniform(-shear[1], shear[1]) - shear_matrix = np.array([[1, shear[0], 0], - [shear[1], 1, 0], - [0, 0, 1]]) + shear_matrix = np.array([[1, shear[0], 0], [shear[1], 1, 0], [0, 0, 1]]) h, w = x[0].shape[row_index], x[0].shape[col_index] transform_matrix = transform_matrix_offset_center(shear_matrix, h, w) results = [] for data in x: - results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) + results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) return np.asarray(results) + # swirl -def swirl(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False): - """Swirl an image randomly or non-randomly, see `scikit-image swirl API `_ - and `example `_. +def swirl(x, + center=None, + strength=1, + radius=100, + rotation=0, + output_shape=None, + order=1, + mode='constant', + cval=0, + clip=True, + preserve_range=False, + is_random=False): + """Swirl an image randomly or non-randomly, see `scikit-image swirl API `__ + and `example `__. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - center : (row, column) tuple or (2,) ndarray, optional - Center coordinate of transformation. - strength : float, optional + center : tuple or 2 int or None + Center coordinate of transformation (optional). + strength : float The amount of swirling applied. - radius : float, optional + radius : float The extent of the swirl in pixels. The effect dies out rapidly beyond radius. - rotation : float, (degree) optional + rotation : float Additional rotation applied to the image, usually [0, 360], relates to center. - output_shape : tuple (rows, cols), optional - Shape of the output image generated. By default the shape of the input image is preserved. + output_shape : tuple of 2 int or None + Shape of the output image generated (height, width). By default the shape of the input image is preserved. order : int, optional The order of the spline interpolation, default is 1. The order has to be in the range 0-5. See skimage.transform.warp for detail. - mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional - Points outside the boundaries of the input are filled according to the given mode, with ‘constant’ used as the default. Modes match the behaviour of numpy.pad. - cval : float, optional - Used in conjunction with mode ‘constant’, the value outside the image boundaries. - clip : bool, optional + mode : str + One of `constant` (default), `edge`, `symmetric` `reflect` and `wrap`. + Points outside the boundaries of the input are filled according to the given mode, with `constant` used as the default. Modes match the behaviour of numpy.pad. + cval : float + Used in conjunction with mode `constant`, the value outside the image boundaries. + clip : boolean Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : bool, optional + preserve_range : boolean Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. - is_random : boolean, default False - If True, random swirl. + is_random : boolean, + If True, random swirl. Default is False. - random center = [(0 ~ x.shape[0]), (0 ~ x.shape[1])] - random strength = [0, strength] - random radius = [1e-10, radius] - random rotation = [-rotation, rotation] + Returns + ------- + numpy.array + A processed image. + Examples --------- >>> x --> [row, col, 1] greyscale - >>> x = swirl(x, strength=4, radius=100) + >>> x = tl.prepro.swirl(x, strength=4, radius=100) + """ assert radius != 0, Exception("Invalid radius value") rotation = np.pi / 180 * rotation @@ -609,23 +673,52 @@ def swirl(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, rotation = np.random.uniform(-rotation, rotation) max_v = np.max(x) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. + if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. x = x / max_v - swirled = skimage.transform.swirl(x, center=center, strength=strength, radius=radius, rotation=rotation, - output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range) + swirled = skimage.transform.swirl( + x, + center=center, + strength=strength, + radius=radius, + rotation=rotation, + output_shape=output_shape, + order=order, + mode=mode, + cval=cval, + clip=clip, + preserve_range=preserve_range) if max_v > 1: swirled = swirled * max_v return swirled -def swirl_multi(x, center=None, strength=1, radius=100, rotation=0, output_shape=None, order=1, mode='constant', cval=0, clip=True, preserve_range=False, is_random=False): + +def swirl_multi(x, + center=None, + strength=1, + radius=100, + rotation=0, + output_shape=None, + order=1, + mode='constant', + cval=0, + clip=True, + preserve_range=False, + is_random=False): """Swirl multiple images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``swirl``. + others : args + See ``tl.prepro.swirl``. + + Returns + ------- + numpy.array + A list of processed images. + """ assert radius != 0, Exception("Invalid radius value") rotation = np.pi / 180 * rotation @@ -640,40 +733,61 @@ def swirl_multi(x, center=None, strength=1, radius=100, rotation=0, output_shape results = [] for data in x: max_v = np.max(data) - if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. + if max_v > 1: # Note: the input of this fn should be [-1, 1], rescale is required. data = data / max_v - swirled = skimage.transform.swirl(data, center=center, strength=strength, radius=radius, rotation=rotation, - output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range) + swirled = skimage.transform.swirl( + data, + center=center, + strength=strength, + radius=radius, + rotation=rotation, + output_shape=output_shape, + order=order, + mode=mode, + cval=cval, + clip=clip, + preserve_range=preserve_range) if max_v > 1: swirled = swirled * max_v - results.append( swirled ) + results.append(swirled) return np.asarray(results) + # elastic_transform -from scipy.ndimage.interpolation import map_coordinates -from scipy.ndimage.filters import gaussian_filter + def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic deformation of images as described in `[Simard2003] `_ . + """Elastic transformation for image as described in `[Simard2003] `__. Parameters ----------- - x : numpy array, a greyscale image. - alpha : scalar factor. - sigma : scalar or sequence of scalars, the smaller the sigma, the more transformation. - Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. - mode : default constant, see `scipy.ndimage.filters.gaussian_filter `_. - cval : float, optional. Used in conjunction with mode ‘constant’, the value outside the image boundaries. - is_random : boolean, default False + x : numpy.array + A greyscale image. + alpha : float + Alpha value for elastic transformation. + sigma : float or sequence of float + The smaller the sigma, the more transformation. Standard deviation for Gaussian kernel. The standard deviations of the Gaussian filter are given for each axis as a sequence, or as a single number, in which case it is equal for all axes. + mode : str + See `scipy.ndimage.filters.gaussian_filter `__. Default is `constant`. + cval : float, + Used in conjunction with `mode` of `constant`, the value outside the image boundaries. + is_random : boolean + Default is False. + + Returns + ------- + numpy.array + A processed image. Examples --------- - >>> x = elastic_transform(x, alpha = x.shape[1] * 3, sigma = x.shape[1] * 0.07) + >>> x = tl.prepro.elastic_transform(x, alpha=x.shape[1]*3, sigma=x.shape[1]*0.07) References ------------ - - `Github `_. - - `Kaggle `_ + - `Github `__. + - `Kaggle `__ + """ if is_random is False: random_state = np.random.RandomState(None) @@ -682,11 +796,11 @@ def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False) # is_3d = False if len(x.shape) == 3 and x.shape[-1] == 1: - x = x[:,:,0] + x = x[:, :, 0] is_3d = True elif len(x.shape) == 3 and x.shape[-1] != 1: raise Exception("Only support greyscale image") - assert len(x.shape)==2 + assert len(x.shape) == 2 shape = x.shape @@ -700,13 +814,22 @@ def elastic_transform(x, alpha, sigma, mode="constant", cval=0, is_random=False) else: return map_coordinates(x, indices, order=1).reshape(shape) + def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random=False): - """Elastic deformation of images as described in `[Simard2003] `_. + """Elastic transformation for images as described in `[Simard2003] `__. Parameters ----------- - x : list of numpy array - others : see ``elastic_transform``. + x : list of numpy.array + List of greyscale images. + others : args + See ``tl.prepro.elastic_transform``. + + Returns + ------- + numpy.array + A list of processed images. + """ if is_random is False: random_state = np.random.RandomState(None) @@ -722,100 +845,103 @@ def elastic_transform_multi(x, alpha, sigma, mode="constant", cval=0, is_random= for data in x: is_3d = False if len(data.shape) == 3 and data.shape[-1] == 1: - data = data[:,:,0] + data = data[:, :, 0] is_3d = True elif len(data.shape) == 3 and data.shape[-1] != 1: raise Exception("Only support greyscale image") - assert len(data.shape)==2 + assert len(data.shape) == 2 dx = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha dy = gaussian_filter((new_shape * 2 - 1), sigma, mode=mode, cval=cval) * alpha x_, y_ = np.meshgrid(np.arange(shape[0]), np.arange(shape[1]), indexing='ij') indices = np.reshape(x_ + dx, (-1, 1)), np.reshape(y_ + dy, (-1, 1)) - # print(data.shape) + # logging.info(data.shape) if is_3d: - results.append( map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1))) + results.append(map_coordinates(data, indices, order=1).reshape((shape[0], shape[1], 1))) else: - results.append( map_coordinates(data, indices, order=1).reshape(shape) ) + results.append(map_coordinates(data, indices, order=1).reshape(shape)) return np.asarray(results) + # zoom -def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1): +def zoom(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Zoom in and out of a single image, randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). zoom_range : list or tuple - - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in. - - If is_random=True, it is (min zoom out, max zoom out) for x and y with different random zoom in/out factor. - e.g (0.5, 1) zoom in 1~2 times. - is_random : boolean, default False - If True, randomly zoom. - row_index, col_index, channel_index : int + Zoom range for height and width. + - If is_random=False, (h, w) are the fixed zoom factor for row and column axies, factor small than one is zoom in. + - If is_random=True, (h, w) are (min zoom out, max zoom out) for x and y with different random zoom in/out factor, e.g (0.5, 1) zoom in 1~2 times. + is_random : boolean + If True, randomly zoom. Default is False. + row_index col_index and channel_index : int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’. - - - `scipy ndimage affine_transform `_ - cval : scalar, optional + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ + cval : float Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0. - order : int, optional - The order of interpolation. The order has to be in the range 0-5. See ``apply_transform``. + order : int + The order of interpolation. The order has to be in the range 0-5. See ``tl.prepro.apply_transform`` and `scipy ndimage affine_transform `__ + + Returns + ------- + numpy.array + A processed image. - - `scipy ndimage affine_transform `_ """ if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' - 'Received arg: ', zoom_range) + raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) if is_random: if zoom_range[0] == 1 and zoom_range[1] == 1: zx, zy = 1, 1 - print(" random_zoom : not zoom in/out") + logging.info(" random_zoom : not zoom in/out") else: zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) else: zx, zy = zoom_range - # print(zx, zy) - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) + # logging.info(zx, zy) + zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) h, w = x.shape[row_index], x.shape[col_index] transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) x = apply_transform(x, transform_matrix, channel_index, fill_mode, cval, order) return x -def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, - row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): + +def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1): """Zoom in and out of images with the same arguments, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``zoom``. + others : args + See ``tl.prepro.zoom``. + + Returns + ------- + numpy.array + A list of processed images. + """ if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' - 'Received arg: ', zoom_range) + raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) if is_random: if zoom_range[0] == 1 and zoom_range[1] == 1: zx, zy = 1, 1 - print(" random_zoom : not zoom in/out") + logging.info(" random_zoom : not zoom in/out") else: zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) else: zx, zy = zoom_range - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) + zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) h, w = x[0].shape[row_index], x[0].shape[col_index] transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) @@ -823,84 +949,112 @@ def zoom_multi(x, zoom_range=(0.9, 1.1), is_random=False, # return x results = [] for data in x: - results.append( apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) + results.append(apply_transform(data, transform_matrix, channel_index, fill_mode, cval, order)) return np.asarray(results) + # image = tf.image.random_brightness(image, max_delta=32. / 255.) # image = tf.image.random_saturation(image, lower=0.5, upper=1.5) # image = tf.image.random_hue(image, max_delta=0.032) # image = tf.image.random_contrast(image, lower=0.5, upper=1.5) -# brightness + def brightness(x, gamma=1, gain=1, is_random=False): """Change the brightness of a single image, randomly or non-randomly. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - gamma : float, small than 1 means brighter. - Non negative real number. Default value is 1, smaller means brighter. - - - If is_random is True, gamma in a range of (1-gamma, 1+gamma). + gamma : float + Non negative real number. Default value is 1. + - Small than 1 means brighter. + - If `is_random` is True, gamma in a range of (1-gamma, 1+gamma). gain : float The constant multiplier. Default value is 1. - is_random : boolean, default False - - If True, randomly change brightness. + is_random : boolean + If True, randomly change brightness. Default is False. + + Returns + ------- + numpy.array + A processed image. References ----------- - - `skimage.exposure.adjust_gamma `_ - - `chinese blog `_ + - `skimage.exposure.adjust_gamma `__ + - `chinese blog `__ + """ if is_random: - gamma = np.random.uniform(1-gamma, 1+gamma) + gamma = np.random.uniform(1 - gamma, 1 + gamma) x = exposure.adjust_gamma(x, gamma, gain) return x + def brightness_multi(x, gamma=1, gain=1, is_random=False): """Change the brightness of multiply images, randomly or non-randomly. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpyarray List of images with dimension of [n_images, row, col, channel] (default). - others : see ``brightness``. + others : args + See ``tl.prepro.brightness``. + + Returns + ------- + numpy.array + A list of processed images. + """ if is_random: - gamma = np.random.uniform(1-gamma, 1+gamma) + gamma = np.random.uniform(1 - gamma, 1 + gamma) results = [] for data in x: - results.append( exposure.adjust_gamma(data, gamma, gain) ) + results.append(exposure.adjust_gamma(data, gamma, gain)) return np.asarray(results) -# illumination + def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False): """Perform illumination augmentation for a single image, randomly or non-randomly. Parameters ----------- - x : numpy array - an image with dimension of [row, col, channel] (default). - gamma : change brightness (the same with ``tl.prepro.brightness``) - - if is_random=False, one float number, small than one means brighter, greater than one means darker. - - if is_random=True, tuple of two float numbers, (min, max). - contrast : change contrast - - if is_random=False, one float number, small than one means blur. - - if is_random=True, tuple of two float numbers, (min, max). - saturation : change saturation - - if is_random=False, one float number, small than one means unsaturation. - - if is_random=True, tuple of two float numbers, (min, max). - is_random : whether the parameters are randomly set. + x : numpy.array + An image with dimension of [row, col, channel] (default). + gamma : float + Change brightness (the same with ``tl.prepro.brightness``) + - if is_random=False, one float number, small than one means brighter, greater than one means darker. + - if is_random=True, tuple of two float numbers, (min, max). + contrast : float + Change contrast. + - if is_random=False, one float number, small than one means blur. + - if is_random=True, tuple of two float numbers, (min, max). + saturation : float + Change saturation. + - if is_random=False, one float number, small than one means unsaturation. + - if is_random=True, tuple of two float numbers, (min, max). + is_random : boolean + If True, randomly change illumination. Default is False. + + Returns + ------- + numpy.array + A processed image. Examples --------- - - Random - >>> x = illumination(x, gamma=(0.5, 5.0), contrast=(0.3, 1.0), saturation=(0.7, 1.0), is_random=True) - - Non-random - >>> x = illumination(x, 0.5, 0.6, 0.8, is_random=False) + Random + + >>> x = tl.prepro.illumination(x, gamma=(0.5, 5.0), contrast=(0.3, 1.0), saturation=(0.7, 1.0), is_random=True) + + Non-random + + >>> x = tl.prepro.illumination(x, 0.5, 0.6, 0.8, is_random=False) + """ from PIL import Image, ImageEnhance @@ -910,42 +1064,49 @@ def illumination(x, gamma=1., contrast=1., saturation=1., is_random=False): except: raise Exception("if is_random = True, the arguments are (min, max)") ## random change brightness # small --> brighter - illum_settings = np.random.randint(0,3) # 0-brighter, 1-darker, 2 keep normal + illum_settings = np.random.randint(0, 3) # 0-brighter, 1-darker, 2 keep normal - if illum_settings == 0: # brighter - gamma = np.random.uniform(gamma[0], 1.0) # (.5, 1.0) - elif illum_settings == 1: # darker - gamma = np.random.uniform(1.0, gamma[1])# (1.0, 5.0) + if illum_settings == 0: # brighter + gamma = np.random.uniform(gamma[0], 1.0) # (.5, 1.0) + elif illum_settings == 1: # darker + gamma = np.random.uniform(1.0, gamma[1]) # (1.0, 5.0) else: gamma = 1 im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - # print("using contrast and saturation") - image = Image.fromarray(im_) # array -> PIL + # logging.info("using contrast and saturation") + image = Image.fromarray(im_) # array -> PIL contrast_adjust = ImageEnhance.Contrast(image) - image = contrast_adjust.enhance(np.random.uniform(contrast[0], contrast[1]))#0.3,0.9)) + image = contrast_adjust.enhance(np.random.uniform(contrast[0], contrast[1])) #0.3,0.9)) saturation_adjust = ImageEnhance.Color(image) - image = saturation_adjust.enhance(np.random.uniform(saturation[0], saturation[1]))# (0.7,1.0)) - im_ = np.array(image) # PIL -> array + image = saturation_adjust.enhance(np.random.uniform(saturation[0], saturation[1])) # (0.7,1.0)) + im_ = np.array(image) # PIL -> array else: im_ = brightness(x, gamma=gamma, gain=1, is_random=False) - image = Image.fromarray(im_) # array -> PIL + image = Image.fromarray(im_) # array -> PIL contrast_adjust = ImageEnhance.Contrast(image) image = contrast_adjust.enhance(contrast) saturation_adjust = ImageEnhance.Color(image) image = saturation_adjust.enhance(saturation) - im_ = np.array(image) # PIL -> array + im_ = np.array(image) # PIL -> array return np.asarray(im_) -# hue + def rgb_to_hsv(rgb): - """ Input RGB image [0~255] return HSV image [0~1]. + """Input RGB image [0~255] return HSV image [0~1]. Parameters - ------------- - rgb : should be a numpy arrays with values between 0 and 255. + ------------ + rgb : numpy.array + An image with values between 0 and 255. + + Returns + ------- + numpy.array + A processed image. + """ # Translated from source of colorsys.rgb_to_hsv # r,g,b should be a numpy arrays with values between 0 and 255 @@ -966,17 +1127,23 @@ def rgb_to_hsv(rgb): rc[mask] = (maxc - r)[mask] / (maxc - minc)[mask] gc[mask] = (maxc - g)[mask] / (maxc - minc)[mask] bc[mask] = (maxc - b)[mask] / (maxc - minc)[mask] - hsv[..., 0] = np.select( - [r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc) + hsv[..., 0] = np.select([r == maxc, g == maxc], [bc - gc, 2.0 + rc - bc], default=4.0 + gc - rc) hsv[..., 0] = (hsv[..., 0] / 6.0) % 1.0 return hsv + def hsv_to_rgb(hsv): - """ Input HSV image [0~1] return RGB image [0~255]. + """Input HSV image [0~1] return RGB image [0~255]. Parameters ------------- - hsv : should be a numpy arrays with values between 0.0 and 1.0 + hsv : numpy.array + An image with values between 0.0 and 1.0 + + Returns + ------- + numpy.array + A processed image. """ # Translated from source of colorsys.hsv_to_rgb # h,s should be a numpy arrays with values between 0.0 and 1.0 @@ -999,45 +1166,59 @@ def hsv_to_rgb(hsv): def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False): - """ Adjust hue of an RGB image. This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. - For TF, see `tf.image.adjust_hue `_ and `tf.image.random_hue `_. + """Adjust hue of an RGB image. + + This is a convenience method that converts an RGB image to float representation, converts it to HSV, add an offset to the hue channel, converts back to RGB and then back to the original data type. + For TF, see `tf.image.adjust_hue `__.and `tf.image.random_hue `__. Parameters ----------- - im : should be a numpy arrays with values between 0 and 255. - hout : float. - - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue. - - If is_offset is True, add this value as the offset to the hue channel. - is_offset : boolean, default True. - is_clip : boolean, default True. - - If True, set negative hue values to 0. - is_random : boolean, default False. + im : numpy.array + An image with values between 0 and 255. + hout : float + The scale value for adjusting hue. + - If is_offset is False, set all hue values to this value. 0 is red; 0.33 is green; 0.66 is blue. + - If is_offset is True, add this value as the offset to the hue channel. + is_offset : boolean + Whether `hout` is added on HSV as offset or not. Default is True. + is_clip : boolean + If HSV value smaller than 0, set to 0. Default is True. + is_random : boolean + If True, randomly change hue. Default is False. + + Returns + ------- + numpy.array + A processed image. Examples --------- - - Random, add a random value between -0.2 and 0.2 as the offset to every hue values. + Random, add a random value between -0.2 and 0.2 as the offset to every hue values. + >>> im_hue = tl.prepro.adjust_hue(image, hout=0.2, is_offset=True, is_random=False) - - Non-random, make all hue to green. + Non-random, make all hue to green. + >>> im_green = tl.prepro.adjust_hue(image, hout=0.66, is_offset=False, is_random=False) References ----------- - - `tf.image.random_hue `_. - - `tf.image.adjust_hue `_. - - `StackOverflow: Changing image hue with python PIL `_. + - `tf.image.random_hue `__. + - `tf.image.adjust_hue `__. + - `StackOverflow: Changing image hue with python PIL `__. + """ hsv = rgb_to_hsv(im) if is_random: hout = np.random.uniform(-hout, hout) if is_offset: - hsv[...,0] += hout + hsv[..., 0] += hout else: - hsv[...,0] = hout + hsv[..., 0] = hout if is_clip: - hsv[...,0] = np.clip(hsv[...,0], 0, np.inf) # Hao : can remove green dots + hsv[..., 0] = np.clip(hsv[..., 0], 0, np.inf) # Hao : can remove green dots rgb = hsv_to_rgb(hsv) return rgb @@ -1053,36 +1234,36 @@ def adjust_hue(im, hout=0.66, is_offset=True, is_clip=True, is_random=False): # #TODO # pass -# resize + def imresize(x, size=[100, 100], interp='bicubic', mode=None): - """Resize an image by given output size and method. Warning, this function - will rescale the value to [0, 255]. + """Resize an image by given output size and method. + + Warning, this function will rescale the value to [0, 255]. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - size : int, float or tuple (h, w) - - int, Percentage of current size. - - float, Fraction of current size. - - tuple, Size of the output image. - interp : str, optional - Interpolation to use for re-sizing (‘nearest’, ‘lanczos’, ‘bilinear’, ‘bicubic’ or ‘cubic’). - mode : str, optional - The PIL image mode (‘P’, ‘L’, etc.) to convert arr before resizing. + size : list of 2 int + For height and width. + interp : str + Interpolation method for re-sizing (`nearest`, `lanczos`, `bilinear`, `bicubic` (default) or `cubic`). + mode : str + The PIL image mode (`P`, `L`, etc.) to convert arr before resizing. Returns - -------- - imresize : ndarray - The resized array of image. + ------- + numpy.array + A processed image. References ------------ - - `scipy.misc.imresize `_ + - `scipy.misc.imresize `__ + """ if x.shape[-1] == 1: # greyscale - x = scipy.misc.imresize(x[:,:,0], size, interp=interp, mode=mode) + x = scipy.misc.imresize(x[:, :, 0], size, interp=interp, mode=mode) return x[:, :, np.newaxis] elif x.shape[-1] == 3: # rgb, bgr .. @@ -1090,24 +1271,35 @@ def imresize(x, size=[100, 100], interp='bicubic', mode=None): else: raise Exception("Unsupported channel %d" % x.shape[-1]) + # value scale def pixel_value_scale(im, val=0.9, clip=[], is_random=False): """Scales each value in the pixels of the image. Parameters ----------- - im : numpy array for one image. - val : float. - - If is_random=False, multiply this value with all pixels. - - If is_random=True, multiply a value between [1-val, 1+val] with all pixels. + im : numpy.array + An image. + val : float + The scale value for changing pixel value. + - If is_random=False, multiply this value with all pixels. + - If is_random=True, multiply a value between [1-val, 1+val] with all pixels. + + Returns + ------- + numpy.array + A processed image. Examples ---------- - - Random + Random + >>> im = pixel_value_scale(im, 0.1, [0, 255], is_random=True) - - Non-random + Non-random + >>> im = pixel_value_scale(im, 0.9, [0, 255], is_random=False) + """ if is_random: scale = 1 + np.random.uniform(-val, val) @@ -1120,20 +1312,28 @@ def pixel_value_scale(im, val=0.9, clip=[], is_random=False): return im + # normailization -def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, - channel_index=2, epsilon=1e-7): +def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_normalization=False, channel_index=2, epsilon=1e-7): """Normalize an image by rescale, samplewise centering and samplewise centering in order. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - rescale : rescaling factor. - If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation) - samplewise_center : set each sample mean to 0. - samplewise_std_normalization : divide each input by its std. - epsilon : small position value for dividing standard deviation. + rescale : float + Rescaling factor. If None or 0, no rescaling is applied, otherwise we multiply the data by the value provided (before applying any other transformation) + samplewise_center : boolean + If True, set each sample mean to 0. + samplewise_std_normalization : boolean + If True, divide each input by its std. + epsilon : float + A small position value for dividing standard deviation. + + Returns + ------- + numpy.array + A processed image. Examples -------- @@ -1144,9 +1344,9 @@ def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_nor Notes ------ When samplewise_center and samplewise_std_normalization are True. - - For greyscale image, every pixels are subtracted and divided by the mean and std of whole image. - For RGB image, every pixels are subtracted and divided by the mean and std of this pixel i.e. the mean and std of a pixel is 0 and 1. + """ if rescale: x *= rescale @@ -1168,17 +1368,27 @@ def samplewise_norm(x, rescale=None, samplewise_center=False, samplewise_std_nor else: raise Exception("Unsupported channels %d" % x.shape[channel_index]) + def featurewise_norm(x, mean=None, std=None, epsilon=1e-7): """Normalize every pixels by the same given mean and std, which are usually compute from all examples. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - mean : value for subtraction. - std : value for division. - epsilon : small position value for dividing standard deviation. + mean : float + Value for subtraction. + std : float + Value for division. + epsilon : float + A small position value for dividing standard deviation. + + Returns + ------- + numpy.array + A processed image. + """ if mean: x = x - mean @@ -1186,42 +1396,58 @@ def featurewise_norm(x, mean=None, std=None, epsilon=1e-7): x = x / (std + epsilon) return x + # whitening def get_zca_whitening_principal_components_img(X): """Return the ZCA whitening principal components matrix. Parameters ----------- - x : numpy array - Batch of image with dimension of [n_example, row, col, channel] (default). + x : numpy.array + Batch of images with dimension of [n_example, row, col, channel] (default). + + Returns + ------- + numpy.array + A processed image. + """ flatX = np.reshape(X, (X.shape[0], X.shape[1] * X.shape[2] * X.shape[3])) - print("zca : computing sigma ..") + logging.info("zca : computing sigma ..") sigma = np.dot(flatX.T, flatX) / flatX.shape[0] - print("zca : computing U, S and V ..") + logging.info("zca : computing U, S and V ..") U, S, V = linalg.svd(sigma) - print("zca : computing principal components ..") + logging.info("zca : computing principal components ..") principal_components = np.dot(np.dot(U, np.diag(1. / np.sqrt(S + 10e-7))), U.T) return principal_components + def zca_whitening(x, principal_components): """Apply ZCA whitening on an image by given principal components matrix. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - principal_components : matrix from ``get_zca_whitening_principal_components_img``. + principal_components : matrix + Matrix from ``get_zca_whitening_principal_components_img``. + + Returns + ------- + numpy.array + A processed image. + """ flatx = np.reshape(x, (x.size)) - # print(principal_components.shape, x.shape) # ((28160, 28160), (160, 176, 1)) + # logging.info(principal_components.shape, x.shape) # ((28160, 28160), (160, 176, 1)) # flatx = np.reshape(x, (x.shape)) # flatx = np.reshape(x, (x.shape[0], )) - # print(flatx.shape) # (160, 176, 1) + # logging.info(flatx.shape) # (160, 176, 1) whitex = np.dot(flatx, principal_components) x = np.reshape(whitex, (x.shape[0], x.shape[1], x.shape[2])) return x + # developing # def barrel_transform(x, intensity): # # https://github.com/fchollet/keras/blob/master/keras/preprocessing/image.py @@ -1233,20 +1459,27 @@ def zca_whitening(x, principal_components): # # TODO # pass + # channel shift def channel_shift(x, intensity, is_random=False, channel_index=2): - """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis `_. + """Shift the channels of an image, randomly or non-randomly, see `numpy.rollaxis `__. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). intensity : float Intensity of shifting. - is_random : boolean, default False - If True, randomly shift. + is_random : boolean + If True, randomly shift. Default is False. channel_index : int - Index of channel, default 2. + Index of channel. Default is 2. + + Returns + ------- + numpy.array + A processed image. + """ if is_random: factor = np.random.uniform(-intensity, intensity) @@ -1254,10 +1487,9 @@ def channel_shift(x, intensity, is_random=False, channel_index=2): factor = intensity x = np.rollaxis(x, channel_index, 0) min_x, max_x = np.min(x), np.max(x) - channel_images = [np.clip(x_channel + factor, min_x, max_x) - for x_channel in x] + channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index+1) + x = np.rollaxis(x, 0, channel_index + 1) return x # x = np.rollaxis(x, channel_index, 0) # min_x, max_x = np.min(x), np.max(x) @@ -1267,15 +1499,23 @@ def channel_shift(x, intensity, is_random=False, channel_index=2): # x = np.rollaxis(x, 0, channel_index+1) # return x + def channel_shift_multi(x, intensity, is_random=False, channel_index=2): - """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis `_ . + """Shift the channels of images with the same arguments, randomly or non-randomly, see `numpy.rollaxis `__. Usually be used for image segmentation which x=[X, Y], X and Y should be matched. Parameters ----------- - x : list of numpy array + x : list of numpy.array List of images with dimension of [n_images, row, col, channel] (default). - others : see ``channel_shift``. + others : args + See ``tl.prepro.channel_shift``. + + Returns + ------- + numpy.array + A list of processed images. + """ if is_random: factor = np.random.uniform(-intensity, intensity) @@ -1286,66 +1526,80 @@ def channel_shift_multi(x, intensity, is_random=False, channel_index=2): for data in x: data = np.rollaxis(data, channel_index, 0) min_x, max_x = np.min(data), np.max(data) - channel_images = [np.clip(x_channel + factor, min_x, max_x) - for x_channel in x] + channel_images = [np.clip(x_channel + factor, min_x, max_x) for x_channel in x] data = np.stack(channel_images, axis=0) - data = np.rollaxis(x, 0, channel_index+1) - results.append( data ) + data = np.rollaxis(x, 0, channel_index + 1) + results.append(data) return np.asarray(results) + # noise def drop(x, keep=0.5): """Randomly set some pixels to zero by a given keeping probability. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] or [row, col]. - keep : float (0, 1) - The keeping probability, the lower more values will be set to zero. + keep : float + The keeping probability (0, 1), the lower more values will be set to zero. + + Returns + ------- + numpy.array + A processed image. + """ if len(x.shape) == 3: - if x.shape[-1]==3: # color + if x.shape[-1] == 3: # color img_size = x.shape mask = np.random.binomial(n=1, p=keep, size=x.shape[:-1]) for i in range(3): - x[:,:,i] = np.multiply(x[:,:,i] , mask) - elif x.shape[-1]==1: # greyscale image + x[:, :, i] = np.multiply(x[:, :, i], mask) + elif x.shape[-1] == 1: # greyscale image img_size = x.shape - x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size)) + x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) else: raise Exception("Unsupported shape {}".format(x.shape)) - elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector + elif len(x.shape) == 2 or 1: # greyscale matrix (image) or vector img_size = x.shape - x = np.multiply(x , np.random.binomial(n=1, p=keep, size=img_size)) + x = np.multiply(x, np.random.binomial(n=1, p=keep, size=img_size)) else: raise Exception("Unsupported shape {}".format(x.shape)) return x + # x = np.asarray([[1,2,3,4,5,6,7,8,9,10],[1,2,3,4,5,6,7,8,9,10]]) # x = np.asarray([x,x,x,x,x,x]) # x.shape = 10, 4, 3 -# # print(x) +# # logging.info(x) # # exit() -# print(x.shape) +# logging.info(x.shape) # # exit() -# print(drop(x, keep=1.)) +# logging.info(drop(x, keep=1.)) # exit() + # manual transform def transform_matrix_offset_center(matrix, x, y): """Return transform matrix offset center. Parameters ---------- - matrix : numpy array - Transform matrix - x, y : int + matrix : numpy.array + Transform matrix. + x and y : 2 int Size of image. + Returns + ------- + numpy.array + The transform matrix. + Examples -------- - - See ``rotation``, ``shear``, ``zoom``. + - See ``tl.prepro.rotation``, ``tl.prepro.shear``, ``tl.prepro.zoom``. + """ o_x = float(x) / 2 + 0.5 o_y = float(y) / 2 + 0.5 @@ -1356,121 +1610,140 @@ def transform_matrix_offset_center(matrix, x, y): def apply_transform(x, transform_matrix, channel_index=2, fill_mode='nearest', cval=0., order=1): - """Return transformed images by given transform_matrix from ``transform_matrix_offset_center``. + """Return transformed images by given ``transform_matrix`` from ``transform_matrix_offset_center``. Parameters ---------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). - transform_matrix : numpy array + transform_matrix : numpy.array Transform matrix (offset center), can be generated by ``transform_matrix_offset_center`` channel_index : int Index of channel, default 2. - fill_mode : string - Method to fill missing pixel, default ‘nearest’, more options ‘constant’, ‘reflect’ or ‘wrap’ - - - `scipy ndimage affine_transform `_ - cval : scalar, optional + fill_mode : str + Method to fill missing pixel, default `nearest`, more options `constant`, `reflect` or `wrap`, see `scipy ndimage affine_transform `__ + cval : float Value used for points outside the boundaries of the input if mode='constant'. Default is 0.0 - order : int, optional + order : int The order of interpolation. The order has to be in the range 0-5: + - 0 Nearest-neighbor + - 1 Bi-linear (default) + - 2 Bi-quadratic + - 3 Bi-cubic + - 4 Bi-quartic + - 5 Bi-quintic + - `scipy ndimage affine_transform `__ - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - - - `scipy ndimage affine_transform `_ + Returns + ------- + numpy.array + A processed image. Examples -------- - - See ``rotation``, ``shift``, ``shear``, ``zoom``. + - See ``tl.prepro.rotation``, ``tl.prepro.shift``, ``tl.prepro.shear``, ``tl.prepro.zoom``. + """ x = np.rollaxis(x, channel_index, 0) final_affine_matrix = transform_matrix[:2, :2] final_offset = transform_matrix[:2, 2] - channel_images = [ndi.interpolation.affine_transform(x_channel, final_affine_matrix, - final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x] + channel_images = [ + ndi.interpolation.affine_transform(x_channel, final_affine_matrix, final_offset, order=order, mode=fill_mode, cval=cval) for x_channel in x + ] x = np.stack(channel_images, axis=0) - x = np.rollaxis(x, 0, channel_index+1) + x = np.rollaxis(x, 0, channel_index + 1) return x def projective_transform_by_points(x, src, dst, map_args={}, output_shape=None, order=1, mode='constant', cval=0.0, clip=True, preserve_range=False): - """Projective transform by given coordinates, usually 4 coordinates. see `scikit-image `_. + """Projective transform by given coordinates, usually 4 coordinates. + + see `scikit-image `__. Parameters ----------- - x : numpy array + x : numpy.array An image with dimension of [row, col, channel] (default). src : list or numpy The original coordinates, usually 4 coordinates of (width, height). dst : list or numpy The coordinates after transformation, the number of coordinates is the same with src. - map_args : dict, optional - Keyword arguments passed to inverse_map. - output_shape : tuple (rows, cols), optional + map_args : dictionary + Keyword arguments passed to inverse map. + output_shape : tuple of 2 int Shape of the output image generated. By default the shape of the input image is preserved. Note that, even for multi-band images, only rows and columns need to be specified. - order : int, optional + order : int The order of interpolation. The order has to be in the range 0-5: - - - 0 Nearest-neighbor - - 1 Bi-linear (default) - - 2 Bi-quadratic - - 3 Bi-cubic - - 4 Bi-quartic - - 5 Bi-quintic - mode : {‘constant’, ‘edge’, ‘symmetric’, ‘reflect’, ‘wrap’}, optional + - 0 Nearest-neighbor + - 1 Bi-linear (default) + - 2 Bi-quadratic + - 3 Bi-cubic + - 4 Bi-quartic + - 5 Bi-quintic + mode : str + One of `constant` (default), `edge`, `symmetric`, `reflect` or `wrap`. Points outside the boundaries of the input are filled according to the given mode. Modes match the behaviour of numpy.pad. - cval : float, optional - Used in conjunction with mode ‘constant’, the value outside the image boundaries. - clip : bool, optional + cval : float + Used in conjunction with mode `constant`, the value outside the image boundaries. + clip : boolean Whether to clip the output to the range of values of the input image. This is enabled by default, since higher order interpolation may produce values outside the given input range. - preserve_range : bool, optional + preserve_range : boolean Whether to keep the original range of values. Otherwise, the input image is converted according to the conventions of img_as_float. + Returns + ------- + numpy.array + A processed image. + Examples -------- - >>> Assume X is an image from CIFAR 10, i.e. shape == (32, 32, 3) + Assume X is an image from CIFAR-10, i.e. shape == (32, 32, 3) + >>> src = [[0,0],[0,32],[32,0],[32,32]] # [w, h] >>> dst = [[10,10],[0,32],[32,0],[32,32]] - >>> x = projective_transform_by_points(X, src, dst) + >>> x = tl.prepro.projective_transform_by_points(X, src, dst) References ----------- - - `scikit-image : geometric transformations `_ - - `scikit-image : examples `_ + - `scikit-image : geometric transformations `__ + - `scikit-image : examples `__ + """ - if type(src) is list: # convert to numpy + if type(src) is list: # convert to numpy src = np.array(src) if type(dst) is list: dst = np.array(dst) - if np.max(x)>1: # convert to [0, 1] - x = x/255 + if np.max(x) > 1: # convert to [0, 1] + x = x / 255 m = transform.ProjectiveTransform() m.estimate(dst, src) - warped = transform.warp(x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range) + warped = transform.warp(x, m, map_args=map_args, output_shape=output_shape, order=order, mode=mode, cval=cval, clip=clip, preserve_range=preserve_range) return warped + # Numpy and PIL -def array_to_img(x, dim_ordering=(0,1,2), scale=True): +def array_to_img(x, dim_ordering=(0, 1, 2), scale=True): """Converts a numpy array to PIL image object (uint8 format). Parameters ---------- - x : numpy array - A image with dimension of 3 and channels of 1 or 3. - dim_ordering : list or tuple of 3 int + x : numpy.array + An image with dimension of 3 and channels of 1 or 3. + dim_ordering : tuple of 3 int Index of row, col and channel, default (0, 1, 2), for theano (1, 2, 0). - scale : boolean, default is True - If True, converts image to [0, 255] from any range of value like [-1, 2]. + scale : boolean + If True, converts image to [0, 255] from any range of value like [-1, 2]. Default is True. + + Returns + ------- + PIL.image + An image. References ----------- - - `PIL Image.fromarray `_ + `PIL Image.fromarray `__ + """ from PIL import Image # if dim_ordering == 'default': @@ -1482,7 +1755,7 @@ def array_to_img(x, dim_ordering=(0,1,2), scale=True): x += max(-np.min(x), 0) x_max = np.max(x) if x_max != 0: - # print(x_max) + # logging.info(x_max) # x /= x_max x = x / x_max x *= 255 @@ -1496,61 +1769,97 @@ def array_to_img(x, dim_ordering=(0,1,2), scale=True): raise Exception('Unsupported channel number: ', x.shape[2]) - - def find_contours(x, level=0.8, fully_connected='low', positive_orientation='low'): - """ Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays - see `skimage.measure.find_contours `_ . + """Find iso-valued contours in a 2D array for a given level value, returns list of (n, 2)-ndarrays + see `skimage.measure.find_contours `__. Parameters ------------ - x : 2D ndarray of double. Input data in which to find contours. - level : float. Value along which to find contours in the array. - fully_connected : str, {‘low’, ‘high’}. Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.) - positive_orientation : either ‘low’ or ‘high’. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If ‘low’ then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour. + x : 2D ndarray of double. + Input data in which to find contours. + level : float + Value along which to find contours in the array. + fully_connected : str + Either `low` or `high`. Indicates whether array elements below the given level value are to be considered fully-connected (and hence elements above the value will only be face connected), or vice-versa. (See notes below for details.) + positive_orientation : str + Either `low` or `high`. Indicates whether the output contours will produce positively-oriented polygons around islands of low- or high-valued elements. If `low` then contours will wind counter-clockwise around elements below the iso-value. Alternately, this means that low-valued elements are always on the left of the contour. + + Returns + -------- + list of (n,2)-ndarrays + Each contour is an ndarray of shape (n, 2), consisting of n (row, column) coordinates along the contour. + """ - return skimage.measure.find_contours(x, level, fully_connected='low', positive_orientation='low') + return skimage.measure.find_contours(x, level, fully_connected=fully_connected, positive_orientation=positive_orientation) + def pt2map(list_points=[], size=(100, 100), val=1): - """ Inputs a list of points, return a 2D image. + """Inputs a list of points, return a 2D image. Parameters -------------- - list_points : list of [x, y]. - size : tuple of (w, h) for output size. - val : float or int for the contour value. + list_points : list of 2 int + [x, y] for point coordinates. + size : tuple of 2 int + (w, h) for output size. + val : float or int + For the contour value. + + Returns + ------- + numpy.array + An image. + """ i_m = np.zeros(size) - if list_points == []: + if len(list_points) == 0: return i_m for xx in list_points: for x in xx: - # print(x) + # logging.info(x) i_m[int(np.round(x[0]))][int(np.round(x[1]))] = val return i_m + def binary_dilation(x, radius=3): - """ Return fast binary morphological dilation of an image. - see `skimage.morphology.binary_dilation `_. + """Return fast binary morphological dilation of an image. + see `skimage.morphology.binary_dilation `__. Parameters ----------- - x : 2D array image. - radius : int for the radius of mask. + x : 2D array + A binary image. + radius : int + For the radius of mask. + + Returns + ------- + numpy.array + A processed binary image. + """ from skimage.morphology import disk, binary_dilation mask = disk(radius) x = binary_dilation(x, selem=mask) return x + def dilation(x, radius=3): - """ Return greyscale morphological dilation of an image, - see `skimage.morphology.dilation `_. + """Return greyscale morphological dilation of an image, + see `skimage.morphology.dilation `__. Parameters ----------- - x : 2D array image. - radius : int for the radius of mask. + x : 2D array + An greyscale image. + radius : int + For the radius of mask. + + Returns + ------- + numpy.array + A processed greyscale image. + """ from skimage.morphology import disk, dilation mask = disk(radius) @@ -1559,27 +1868,44 @@ def dilation(x, radius=3): def binary_erosion(x, radius=3): - """ Return binary morphological erosion of an image, - see `skimage.morphology.binary_erosion `_. + """Return binary morphological erosion of an image, + see `skimage.morphology.binary_erosion `__. Parameters ----------- - x : 2D array image. - radius : int for the radius of mask. + x : 2D array + A binary image. + radius : int + For the radius of mask. + + Returns + ------- + numpy.array + A processed binary image. + """ from skimage.morphology import disk, dilation, binary_erosion mask = disk(radius) x = binary_erosion(x, selem=mask) return x + def erosion(x, radius=3): - """ Return greyscale morphological erosion of an image, - see `skimage.morphology.erosion `_. + """Return greyscale morphological erosion of an image, + see `skimage.morphology.erosion `__. Parameters ----------- - x : 2D array image. - radius : int for the radius of mask. + x : 2D array + A greyscale image. + radius : int + For the radius of mask. + + Returns + ------- + numpy.array + A processed greyscale image. + """ from skimage.morphology import disk, dilation, erosion mask = disk(radius) @@ -1587,16 +1913,21 @@ def erosion(x, radius=3): return x - -## Object Detection - def obj_box_coords_rescale(coords=[], shape=[100, 200]): """Scale down a list of coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. Parameters ------------ - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - shape : list of 2 integers for [height, width] of the image. + coords : list of list for 4 int + For coordinates of more than one images .e.g.[[x, y, w, h], [x, y, w, h], ...]. + shape : list of 2 int + 【height, width]. + + Returns + ------- + list of list of 4 numbers + A list of new bounding boxes. + Examples --------- @@ -1609,9 +1940,15 @@ def obj_box_coords_rescale(coords=[], shape=[100, 200]): >>> coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) >>> print(coords) ... [[0.15, 0.4, 0.25, 0.5]] + + Returns + ------- + list of 4 numbers + New coordinates. + """ imh, imw = shape[0], shape[1] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point + imh = imh * 1.0 # * 1.0 for python2 : force division to be float point imw = imw * 1.0 coords_new = list() for coord in coords: @@ -1623,70 +1960,99 @@ def obj_box_coords_rescale(coords=[], shape=[100, 200]): coords_new.append([x, y, w, h]) return coords_new + def obj_box_coord_rescale(coord=[], shape=[100, 200]): """Scale down one coordinates from pixel unit to the ratio of image size i.e. in the range of [0, 1]. It is the reverse process of ``obj_box_coord_scale_to_pixelunit``. Parameters ------------ - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - shape : list of 2 integers for [height, width] of the image. + coords : list of 4 int + One coordinates of one image e.g. [x, y, w, h]. + shape : list of 2 int + For [height, width]. + + Returns + ------- + list of 4 numbers + New bounding box. Examples --------- - >>> coord = obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100]) - ... [[0.3, 0.4, 0.5, 0.5]] + >>> coord = tl.prepro.obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100]) + ... [0.3, 0.4, 0.5, 0.5] + """ return obj_box_coords_rescale(coords=[coord], shape=shape)[0] -# coord = obj_box_coord_rescale(coord=[30, 40, 50, 50], shape=[100, 100]) -# print(coord) #[[0.15, 0.4, 0.25, 0.5]] -# exit() -def obj_box_coord_scale_to_pixelunit(coord, shape=(100, 100, 3)): - """ Convert one coordinate [x, y, w (or x2), h (or y2)] in ratio format to image coordinate format. +def obj_box_coord_scale_to_pixelunit(coord, shape=(100, 100)): + """Convert one coordinate [x, y, w (or x2), h (or y2)] in ratio format to image coordinate format. It is the reverse process of ``obj_box_coord_rescale``. Parameters ----------- - coord : list of float, [x, y, w (or x2), h (or y2)] in ratio format, i.e value range [0~1]. - shape : tuple of (height, width, channel (optional)) + coord : list of 4 float + One coordinate of one image [x, y, w (or x2), h (or y2)] in ratio format, i.e value range [0~1]. + shape : tuple of 2 + For [height, width]. + + Returns + ------- + list of 4 numbers + New bounding box. Examples --------- - >>> x, y, x2, y2 = obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3)) - ... (40, 30, 100, 70) + >>> x, y, x2, y2 = tl.prepro.obj_box_coord_scale_to_pixelunit([0.2, 0.3, 0.5, 0.7], shape=(100, 200, 3)) + ... [40, 30, 100, 70] + """ imh, imw = shape[0:2] - x = int(coord[0]*imw) - x2 = int(coord[2]*imw) - y = int(coord[1]*imh) - y2 = int(coord[3]*imh) + x = int(coord[0] * imw) + x2 = int(coord[2] * imw) + y = int(coord[1] * imh) + y2 = int(coord[3] * imh) return [x, y, x2, y2] + # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50], [10, 10, 20, 20]], shape=[100, 100]) -# print(coords) +# logging.info(coords) # # ... [[0.3, 0.4, 0.5, 0.5], [0.1, 0.1, 0.2, 0.2]] # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[50, 100]) -# print(coords) +# logging.info(coords) # # ... [[0.3, 0.8, 0.5, 1.0]] # coords = obj_box_coords_rescale(coords=[[30, 40, 50, 50]], shape=[100, 200]) -# print(coords) +# logging.info(coords) # # ... [[0.15, 0.4, 0.25, 0.5]] # exit() + def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False): - """ Convert one coordinate [x_center, y_center, w, h] to [x1, y1, x2, y2] in up-left and botton-right format. + """Convert one coordinate [x_center, y_center, w, h] to [x1, y1, x2, y2] in up-left and botton-right format. + + Parameters + ------------ + coord : list of 4 int/float + One coordinate. + to_int : boolean + Whether to convert output as integer. + + Returns + ------- + list of 4 numbers + New bounding box. Examples --------- >>> coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) ... [20, 30, 40, 50] + """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" + assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. + x = x_center - w / 2. + y = y_center - h / 2. x2 = x + w y2 = y + h if to_int: @@ -1694,15 +2060,28 @@ def obj_box_coord_centroid_to_upleft_butright(coord, to_int=False): else: return [x, y, x2, y2] + # coord = obj_box_coord_centroid_to_upleft_butright([30, 40, 20, 20]) -# print(coord) [20, 30, 40, 50] +# logging.info(coord) [20, 30, 40, 50] # exit() + def obj_box_coord_upleft_butright_to_centroid(coord): - """ Convert one coordinate [x1, y1, x2, y2] to [x_center, y_center, w, h]. + """Convert one coordinate [x1, y1, x2, y2] to [x_center, y_center, w, h]. It is the reverse process of ``obj_box_coord_centroid_to_upleft_butright``. + + Parameters + ------------ + coord : list of 4 int/float + One coordinate. + + Returns + ------- + list of 4 numbers + New bounding box. + """ - assert len(coord) == 4, "coordinate should be 4 values : [x1, y1, x2, y2]" + assert len(coord) == 4, "coordinate should be 4 values : [x1, y1, x2, y2]" x1, y1, x2, y2 = coord w = x2 - x1 h = y2 - y1 @@ -1712,32 +2091,66 @@ def obj_box_coord_upleft_butright_to_centroid(coord): def obj_box_coord_centroid_to_upleft(coord): - """ Convert one coordinate [x_center, y_center, w, h] to [x, y, w, h]. + """Convert one coordinate [x_center, y_center, w, h] to [x, y, w, h]. It is the reverse process of ``obj_box_coord_upleft_to_centroid``. + + Parameters + ------------ + coord : list of 4 int/float + One coordinate. + + Returns + ------- + list of 4 numbers + New bounding box. + """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" + assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" x_center, y_center, w, h = coord - x = x_center - w / 2. - y = y_center - h / 2. + x = x_center - w / 2. + y = y_center - h / 2. return [x, y, w, h] + def obj_box_coord_upleft_to_centroid(coord): - """ Convert one coordinate [x, y, w, h] to [x_center, y_center, w, h]. + """Convert one coordinate [x, y, w, h] to [x_center, y_center, w, h]. It is the reverse process of ``obj_box_coord_centroid_to_upleft``. + + Parameters + ------------ + coord : list of 4 int/float + One coordinate. + + Returns + ------- + list of 4 numbers + New bounding box. + """ - assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" + assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" x, y, w, h = coord x_center = x + w / 2. y_center = y + h / 2. return [x_center, y_center, w, h] -## -def parse_darknet_ann_str_to_list(annotation): - """ Input string format of class, x, y, w, h, return list of list format. + +def parse_darknet_ann_str_to_list(annotations): + """Input string format of class, x, y, w, h, return list of list format. + + Parameters + ----------- + annotations : str + The annotations in darkent format "class, x, y, w, h ...." seperated by "\\n". + + Returns + ------- + list of list of 4 numbers + List of bounding box. + """ - annotation = annotation.split("\n") + annotations = annotations.split("\n") ann = [] - for a in annotation: + for a in annotations: a = a.split() if len(a) == 5: for i in range(len(a)): @@ -1748,14 +2161,31 @@ def parse_darknet_ann_str_to_list(annotation): ann.append(a) return ann -def parse_darknet_ann_list_to_cls_box(annotation): - """ Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...]. + +def parse_darknet_ann_list_to_cls_box(annotations): + """Parse darknet annotation format into two lists for class and bounding box. + + Input list of [[class, x, y, w, h], ...], return two list of [class ...] and [[x, y, w, h], ...]. + + Parameters + ------------ + annotations : list of list + A list of class and bounding boxes of images e.g. [[class, x, y, w, h], ...] + + Returns + ------- + list of int + List of class labels. + + list of list of 4 numbers + List of bounding box. + """ class_list = [] bbox_list = [] - for i in range(len(annotation)): - class_list.append( annotation[i][0] ) - bbox_list.append( annotation[i][1:] ) + for i in range(len(annotations)): + class_list.append(annotations[i][0]) + bbox_list.append(annotations[i][1:]) return class_list, bbox_list @@ -1764,15 +2194,23 @@ def obj_box_left_right_flip(im, coords=[], is_rescale=False, is_center=False, is Parameters ---------- - im : numpy array + im : numpy.array An image with dimension of [row, col, channel] (default). - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - is_rescale : boolean, default False - Set to True, if the input coordinates are rescaled to [0, 1]. - is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format) - is_random : boolean, default False - If True, randomly flip. + coords : list of list of 4 int/float + Coordinates [[x, y, w, h], [x, y, w, h], ...]. + is_rescale : boolean + Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. + is_center : boolean + Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. + is_random : boolean + If True, randomly flip. Default is False. + + Returns + ------- + numpy.array + A processed image + list of list of 4 numbers + A list of new bounding boxes. Examples -------- @@ -1789,7 +2227,9 @@ def obj_box_left_right_flip(im, coords=[], is_rescale=False, is_center=False, is >>> im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) >>> print(coords) ... [[50, 40, 30, 30]] + """ + def _flip(im, coords): im = flip_axis(im, axis=1, is_random=False) coords_new = list() @@ -1822,32 +2262,43 @@ def _flip(im, coords): else: return _flip(im, coords) + # im = np.zeros([80, 100]) # as an image with shape width=100, height=80 # im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3], [0.1, 0.5, 0.2, 0.3]], is_rescale=True, is_center=True, is_random=False) -# print(coords) +# logging.info(coords) # # ... [[0.8, 0.4, 0.3, 0.3], [0.9, 0.5, 0.2, 0.3]] # im, coords = obj_box_left_right_flip(im, coords=[[0.2, 0.4, 0.3, 0.3]], is_rescale=True, is_center=False, is_random=False) -# print(coords) +# logging.info(coords) # # [[0.5, 0.4, 0.3, 0.3]] # im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=True, is_random=False) -# print(coords) +# logging.info(coords) # # ... [[80, 40, 30, 30]] # im, coords = obj_box_left_right_flip(im, coords=[[20, 40, 30, 30]], is_rescale=False, is_center=False, is_random=False) -# print(coords) +# logging.info(coords) # # [[50, 40, 30, 30]] # exit() + def obj_box_imresize(im, coords=[], size=[100, 100], interp='bicubic', mode=None, is_rescale=False): """Resize an image, and compute the new bounding box coordinates. Parameters ------------- - im : numpy array + im : numpy.array An image with dimension of [row, col, channel] (default). - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - size, interp, mode : see ``tl.prepro.imresize`` for details. - is_rescale : boolean, default False - Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. + coords : list of list of 4 int/float + Coordinates [[x, y, w, h], [x, y, w, h], ...] + size interp and mode : args + See ``tl.prepro.imresize``. + is_rescale : boolean + Set to True, if the input coordinates are rescaled to [0, 1], then return the original coordinates. Default is False. + + Returns + ------- + numpy.array + A processed image + list of list of 4 numbers + A list of new bounding boxes. Examples -------- @@ -1857,16 +2308,17 @@ def obj_box_imresize(im, coords=[], size=[100, 100], interp='bicubic', mode=None ... [[40, 80, 60, 60], [20, 40, 40, 40]] >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) >>> print(coords) - ... [20, 20, 30, 15] + ... [[20, 20, 30, 15]] >>> _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) >>> print(coords) - ... [30, 30, 45, 22] + ... [[30, 30, 45, 22]] >>> im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) >>> print(coords, im2.shape) - ... [0.2, 0.4, 0.3, 0.3] (160, 200, 3) + ... [[0.2, 0.4, 0.3, 0.3]] (160, 200, 3) + """ imh, imw = im.shape[0:2] - imh = imh * 1.0 # * 1.0 for python2 : force division to be float point + imh = imh * 1.0 # * 1.0 for python2 : force division to be float point imw = imw * 1.0 im = imresize(im, size=size, interp=interp, mode=mode) @@ -1875,70 +2327,83 @@ def obj_box_imresize(im, coords=[], size=[100, 100], interp='bicubic', mode=None for coord in coords: assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" # x' = x * (imw'/imw) - x = int(coord[0] * (size[1]/imw)) + x = int(coord[0] * (size[1] / imw)) # y' = y * (imh'/imh) - # print('>>', coord[1], size[0], imh) - y = int(coord[1] * (size[0]/imh)) + # logging.info('>>', coord[1], size[0], imh) + y = int(coord[1] * (size[0] / imh)) # w' = w * (imw'/imw) - w = int(coord[2] * (size[1]/imw)) + w = int(coord[2] * (size[1] / imw)) # h' = h * (imh'/imh) - h = int(coord[3] * (size[0]/imh)) + h = int(coord[3] * (size[0] / imh)) coords_new.append([x, y, w, h]) return im, coords_new else: return im, coords + # im = np.zeros([80, 100, 3]) # as an image with shape width=100, height=80 # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30], [10, 20, 20, 20]], size=[160, 200], is_rescale=False) -# print(coords) +# logging.info(coords) # # ... [[40, 80, 60, 60], [20, 40, 40, 40]] # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[40, 100], is_rescale=False) -# print(coords) +# logging.info(coords) # # ... [20, 20, 30, 15] # _, coords = obj_box_imresize(im, coords=[[20, 40, 30, 30]], size=[60, 150], is_rescale=False) -# print(coords) +# logging.info(coords) # # ... [30, 30, 45, 22] # im2, coords = obj_box_imresize(im, coords=[[0.2, 0.4, 0.3, 0.3]], size=[160, 200], is_rescale=True) -# print(coords, im2.shape) +# logging.info(coords, im2.shape) # # ... [0.2, 0.4, 0.3, 0.3] (160, 200, 3) # exit() -def obj_box_crop(im, classes=[], coords=[], wrg=100, hrg=100, - is_rescale=False, is_center=False, is_random=False, - thresh_wh=0.02, thresh_wh2=12.): + +def obj_box_crop(im, classes=[], coords=[], wrg=100, hrg=100, is_rescale=False, is_center=False, is_random=False, thresh_wh=0.02, thresh_wh2=12.): """Randomly or centrally crop an image, and compute the new bounding box coordinates. Objects outside the cropped image will be removed. Parameters ----------- - im : numpy array + im : numpy.array An image with dimension of [row, col, channel] (default). - classes : list of class ID (int). - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg, hrg, is_random : see ``tl.prepro.crop`` for details. - is_rescale : boolean, default False - Set to True, if the input coordinates are rescaled to [0, 1]. + classes : list of int + Class IDs. + coords : list of list of 4 int/float + Coordinates [[x, y, w, h], [x, y, w, h], ...] + wrg hrg and is_random : args + See ``tl.prepro.crop``. + is_rescale : boolean + Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format) + Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. thresh_wh : float Threshold, remove the box if its ratio of width(height) to image size less than the threshold. thresh_wh2 : float Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. + + Returns + ------- + numpy.array + A processed image + list of int + A list of classes + list of list of 4 numbers + A list of new bounding boxes. + """ h, w = im.shape[0], im.shape[1] assert (h > hrg) and (w > wrg), "The size of cropping should smaller than the original image" if is_random: - h_offset = int(np.random.uniform(0, h-hrg) -1) - w_offset = int(np.random.uniform(0, w-wrg) -1) + h_offset = int(np.random.uniform(0, h - hrg) - 1) + w_offset = int(np.random.uniform(0, w - wrg) - 1) h_end = hrg + h_offset w_end = wrg + w_offset - im_new = im[h_offset: h_end ,w_offset: w_end] - else: # central crop - h_offset = int(np.floor((h - hrg)/2.)) - w_offset = int(np.floor((w - wrg)/2.)) + im_new = im[h_offset:h_end, w_offset:w_end] + else: # central crop + h_offset = int(np.floor((h - hrg) / 2.)) + w_offset = int(np.floor((w - wrg) / 2.)) h_end = h_offset + hrg w_end = w_offset + wrg - im_new = im[h_offset: h_end, w_offset: w_end] + im_new = im[h_offset:h_end, w_offset:w_end] # w # _____________________________ @@ -1951,9 +2416,10 @@ def obj_box_crop(im, classes=[], coords=[], wrg=100, hrg=100, # |___________________________| def _get_coord(coord): - """ Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, + """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, before getting the new coordinates. Boxes outsides the cropped image will be removed. + """ if is_center: coord = obj_box_coord_centroid_to_upleft(coord) @@ -1975,7 +2441,7 @@ def _get_coord(coord): return None w = w + x x = 0 - elif x > im_new.shape[1]: # object outside the cropped image + elif x > im_new.shape[1]: # object outside the cropped image return None if y < 0: @@ -1983,21 +2449,21 @@ def _get_coord(coord): return None h = h + y y = 0 - elif y > im_new.shape[0]: # object outside the cropped image + elif y > im_new.shape[0]: # object outside the cropped image return None - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image + if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image w = im_new.shape[1] - x - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image + if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image h = im_new.shape[0] - y - if (w / (h+1.) > thresh_wh2) or (h / (w+1.) > thresh_wh2): # object shape strange: too narrow - # print('xx', w, h) + if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow + # logging.info('xx', w, h) return None - if (w / (im_new.shape[1]*1.) < thresh_wh) or (h / (im_new.shape[0]*1.) < thresh_wh): # object shape strange: too narrow - # print('yy', w, im_new.shape[1], h, im_new.shape[0]) + if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow + # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) return None coord = [x, y, w, h] @@ -2014,7 +2480,7 @@ def _get_coord(coord): coord = coords[i] assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" if is_rescale: - """ for scaled coord, upscaled before process and scale back in the end. """ + # for scaled coord, upscaled before process and scale back in the end. coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) coord = _get_coord(coord) if coord is not None: @@ -2028,56 +2494,80 @@ def _get_coord(coord): classes_new.append(classes[i]) return im_new, classes_new, coords_new -def obj_box_shift(im, classes=[], coords=[], wrg=0.1, hrg=0.1, - row_index=0, col_index=1, channel_index=2, - fill_mode='nearest', cval=0., order=1, - is_rescale=False, is_center=False, is_random=False, - thresh_wh=0.02, thresh_wh2=12.): - """ Shift an image randomly or non-randomly, and compute the new bounding box coordinates. + +def obj_box_shift(im, + classes=[], + coords=[], + wrg=0.1, + hrg=0.1, + row_index=0, + col_index=1, + channel_index=2, + fill_mode='nearest', + cval=0., + order=1, + is_rescale=False, + is_center=False, + is_random=False, + thresh_wh=0.02, + thresh_wh2=12.): + """Shift an image randomly or non-randomly, and compute the new bounding box coordinates. Objects outside the cropped image will be removed. Parameters ----------- - im : numpy array + im : numpy.array An image with dimension of [row, col, channel] (default). - classes : list of class ID (int). - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - wrg, hrg, row_index, col_index, channel_index, is_random, fill_mode, cval, order : see ``tl.prepro.shift``. - is_rescale : boolean, default False - Set to True, if the input coordinates are rescaled to [0, 1]. - is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format) + classes : list of int + Class IDs. + coords : list of list of 4 int/float + Coordinates [[x, y, w, h], [x, y, w, h], ...] + wrg, hrg row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.shift``. + is_rescale : boolean + Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. + is_center : boolean + Set to True, if the x and y of coordinates are the centroid (i.e. darknet format). Default is False. thresh_wh : float Threshold, remove the box if its ratio of width(height) to image size less than the threshold. thresh_wh2 : float Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. + + + Returns + ------- + numpy.array + A processed image + list of int + A list of classes + list of list of 4 numbers + A list of new bounding boxes. + """ imh, imw = im.shape[row_index], im.shape[col_index] - assert (hrg < 1.0) and (hrg > 0.) and (wrg < 1.0) and (wrg > 0.) , "shift range should be (0, 1)" + assert (hrg < 1.0) and (hrg > 0.) and (wrg < 1.0) and (wrg > 0.), "shift range should be (0, 1)" if is_random: tx = np.random.uniform(-hrg, hrg) * imh ty = np.random.uniform(-wrg, wrg) * imw else: tx, ty = hrg * imh, wrg * imw - translation_matrix = np.array([[1, 0, tx], - [0, 1, ty], - [0, 0, 1]]) + translation_matrix = np.array([[1, 0, tx], [0, 1, ty], [0, 0, 1]]) transform_matrix = translation_matrix # no need to do offset im_new = apply_transform(im, transform_matrix, channel_index, fill_mode, cval, order) # modified from obj_box_crop def _get_coord(coord): - """ Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, + """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, before getting the new coordinates. Boxes outsides the cropped image will be removed. + """ if is_center: coord = obj_box_coord_centroid_to_upleft(coord) ##======= pixel unit format and upleft, w, h ==========## - x = coord[0] - ty # only change this - y = coord[1] - tx # only change this + x = coord[0] - ty # only change this + y = coord[1] - tx # only change this w = coord[2] h = coord[3] @@ -2086,7 +2576,7 @@ def _get_coord(coord): return None w = w + x x = 0 - elif x > im_new.shape[1]: # object outside the cropped image + elif x > im_new.shape[1]: # object outside the cropped image return None if y < 0: @@ -2094,21 +2584,21 @@ def _get_coord(coord): return None h = h + y y = 0 - elif y > im_new.shape[0]: # object outside the cropped image + elif y > im_new.shape[0]: # object outside the cropped image return None - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image + if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image w = im_new.shape[1] - x - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image + if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image h = im_new.shape[0] - y - if (w / (h+1.) > thresh_wh2) or (h / (w+1.) > thresh_wh2): # object shape strange: too narrow - # print('xx', w, h) + if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow + # logging.info('xx', w, h) return None - if (w / (im_new.shape[1]*1.) < thresh_wh) or (h / (im_new.shape[0]*1.) < thresh_wh): # object shape strange: too narrow - # print('yy', w, im_new.shape[1], h, im_new.shape[0]) + if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow + # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) return None coord = [x, y, w, h] @@ -2125,7 +2615,7 @@ def _get_coord(coord): coord = coords[i] assert len(coord) == 4, "coordinate should be 4 values : [x, y, w, h]" if is_rescale: - """ for scaled coord, upscaled before process and scale back in the end. """ + # for scaled coord, upscaled before process and scale back in the end. coord = obj_box_coord_scale_to_pixelunit(coord, im.shape) coord = _get_coord(coord) if coord is not None: @@ -2139,71 +2629,92 @@ def _get_coord(coord): classes_new.append(classes[i]) return im_new, classes_new, coords_new -def obj_box_zoom(im, classes=[], coords=[], zoom_range=(0.9, 1.1), - row_index=0, col_index=1, channel_index=2, fill_mode='nearest', cval=0., order=1, - is_rescale=False, is_center=False, is_random=False, - thresh_wh=0.02, thresh_wh2=12.): + +def obj_box_zoom(im, + classes=[], + coords=[], + zoom_range=(0.9, 1.1), + row_index=0, + col_index=1, + channel_index=2, + fill_mode='nearest', + cval=0., + order=1, + is_rescale=False, + is_center=False, + is_random=False, + thresh_wh=0.02, + thresh_wh2=12.): """Zoom in and out of a single image, randomly or non-randomly, and compute the new bounding box coordinates. Objects outside the cropped image will be removed. Parameters ----------- - im : numpy array + im : numpy.array An image with dimension of [row, col, channel] (default). - classes : list of class ID (int). - coords : list of list for coordinates [[x, y, w, h], [x, y, w, h], ...] - zoom_range, row_index, col_index, channel_index, is_random, fill_mode, cval, order : see ``tl.prepro.zoom``. - is_rescale : boolean, default False - Set to True, if the input coordinates are rescaled to [0, 1]. - is_center : boolean, default False - Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format) + classes : list of int + Class IDs. + coords : list of list of 4 int/float + Coordinates [[x, y, w, h], [x, y, w, h], ...]. + zoom_range row_index col_index channel_index is_random fill_mode cval and order : see ``tl.prepro.zoom``. + is_rescale : boolean + Set to True, if the input coordinates are rescaled to [0, 1]. Default is False. + is_center : boolean + Set to True, if the x and y of coordinates are the centroid. (i.e. darknet format). Default is False. thresh_wh : float Threshold, remove the box if its ratio of width(height) to image size less than the threshold. thresh_wh2 : float Threshold, remove the box if its ratio of width to height or vice verse higher than the threshold. + + Returns + ------- + numpy.array + A processed image + list of int + A list of classes + list of list of 4 numbers + A list of new bounding boxes. + """ if len(zoom_range) != 2: - raise Exception('zoom_range should be a tuple or list of two floats. ' - 'Received arg: ', zoom_range) + raise Exception('zoom_range should be a tuple or list of two floats. ' 'Received arg: ', zoom_range) if is_random: if zoom_range[0] == 1 and zoom_range[1] == 1: zx, zy = 1, 1 - print(" random_zoom : not zoom in/out") + logging.info(" random_zoom : not zoom in/out") else: zx, zy = np.random.uniform(zoom_range[0], zoom_range[1], 2) else: zx, zy = zoom_range - # print(zx, zy) - zoom_matrix = np.array([[zx, 0, 0], - [0, zy, 0], - [0, 0, 1]]) + # logging.info(zx, zy) + zoom_matrix = np.array([[zx, 0, 0], [0, zy, 0], [0, 0, 1]]) h, w = im.shape[row_index], im.shape[col_index] transform_matrix = transform_matrix_offset_center(zoom_matrix, h, w) im_new = apply_transform(im, transform_matrix, channel_index, fill_mode, cval, order) - # modified from obj_box_crop def _get_coord(coord): - """ Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, + """Input pixel-unit [x, y, w, h] format, then make sure [x, y] it is the up-left coordinates, before getting the new coordinates. Boxes outsides the cropped image will be removed. + """ if is_center: coord = obj_box_coord_centroid_to_upleft(coord) ##======= pixel unit format and upleft, w, h ==========## - x = (coord[0] - im.shape[1]/2) / zy + im.shape[1]/2 # only change this - y = (coord[1] - im.shape[0]/2) / zx + im.shape[0]/2 # only change this - w = coord[2] / zy # only change this - h = coord[3] / zx # only change thisS + x = (coord[0] - im.shape[1] / 2) / zy + im.shape[1] / 2 # only change this + y = (coord[1] - im.shape[0] / 2) / zx + im.shape[0] / 2 # only change this + w = coord[2] / zy # only change this + h = coord[3] / zx # only change thisS if x < 0: if x + w <= 0: return None w = w + x x = 0 - elif x > im_new.shape[1]: # object outside the cropped image + elif x > im_new.shape[1]: # object outside the cropped image return None if y < 0: @@ -2211,21 +2722,21 @@ def _get_coord(coord): return None h = h + y y = 0 - elif y > im_new.shape[0]: # object outside the cropped image + elif y > im_new.shape[0]: # object outside the cropped image return None - if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image + if (x is not None) and (x + w > im_new.shape[1]): # box outside the cropped image w = im_new.shape[1] - x - if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image + if (y is not None) and (y + h > im_new.shape[0]): # box outside the cropped image h = im_new.shape[0] - y - if (w / (h+1.) > thresh_wh2) or (h / (w+1.) > thresh_wh2): # object shape strange: too narrow - # print('xx', w, h) + if (w / (h + 1.) > thresh_wh2) or (h / (w + 1.) > thresh_wh2): # object shape strange: too narrow + # logging.info('xx', w, h) return None - if (w / (im_new.shape[1]*1.) < thresh_wh) or (h / (im_new.shape[0]*1.) < thresh_wh): # object shape strange: too narrow - # print('yy', w, im_new.shape[1], h, im_new.shape[0]) + if (w / (im_new.shape[1] * 1.) < thresh_wh) or (h / (im_new.shape[0] * 1.) < thresh_wh): # object shape strange: too narrow + # logging.info('yy', w, im_new.shape[1], h, im_new.shape[0]) return None coord = [x, y, w, h] @@ -2257,9 +2768,6 @@ def _get_coord(coord): return im_new, classes_new, coords_new - - - ## Sequence def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncating='pre', value=0.): """Pads each sequence to the same length: @@ -2272,17 +2780,23 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncat Parameters ---------- - sequences : list of lists where each element is a sequence - maxlen : int, maximum length - dtype : type to cast the resulting sequence. - padding : 'pre' or 'post', pad either before or after each sequence. - truncating : 'pre' or 'post', remove values from sequences larger than - maxlen either in the beginning or in the end of the sequence - value : float, value to pad the sequences to the desired value. + sequences : list of list of int + All sequences where each row is a sequence. + maxlen : int + Maximum length. + dtype : numpy.dtype or str + Data type to cast the resulting sequence. + padding : str + Either 'pre' or 'post', pad either before or after each sequence. + truncating : str + Either 'pre' or 'post', remove values from sequences larger than maxlen either in the beginning or in the end of the sequence + value : float + Value to pad the sequences to the desired value. Returns ---------- - x : numpy array with dimensions (number_of_sequences, maxlen) + x : numpy.array + With dimensions (number_of_sequences, maxlen) Examples ---------- @@ -2292,6 +2806,7 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncat ... [[1 1 1 1 1] ... [2 2 2 0 0] ... [3 3 0 0 0]] + """ lengths = [len(s) for s in sequences] @@ -2321,8 +2836,7 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncat # check `trunc` has expected shape trunc = np.asarray(trunc, dtype=dtype) if trunc.shape[1:] != sample_shape: - raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' % - (trunc.shape[1:], idx, sample_shape)) + raise ValueError('Shape of sample %s of sequence at position %s is different from expected shape %s' % (trunc.shape[1:], idx, sample_shape)) if padding == 'post': x[idx, :len(trunc)] = trunc @@ -2332,19 +2846,28 @@ def pad_sequences(sequences, maxlen=None, dtype='int32', padding='post', truncat raise ValueError('Padding type "%s" not understood' % padding) return x.tolist() + def remove_pad_sequences(sequences, pad_id=0): """Remove padding. Parameters ----------- - sequences : list of list. - pad_id : int. + sequences : list of list of int + All sequences where each row is a sequence. + pad_id : int + The pad ID. + + Returns + ---------- + list of list of int + The processed sequences. Examples ---------- >>> sequences = [[2,3,4,0,0], [5,1,2,3,4,0,0,0], [4,5,0,2,4,0,0,0]] >>> print(remove_pad_sequences(sequences, pad_id=0)) ... [[2, 3, 4], [5, 1, 2, 3, 4], [4, 5, 0, 2, 4]] + """ import copy sequences_out = copy.deepcopy(sequences) @@ -2355,23 +2878,31 @@ def remove_pad_sequences(sequences, pad_id=0): # break for j in range(1, len(sequences[i])): if sequences[i][-j] != pad_id: - sequences_out[i] = sequences_out[i][0:-j+1] + sequences_out[i] = sequences_out[i][0:-j + 1] break return sequences_out + def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_end_id=False): """Set all tokens(ids) after END token to the padding value, and then shorten (option) it to the maximum sequence length in this batch. Parameters ----------- - sequences : numpy array or list of list with token IDs. - e.g. [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]] - end_id : int, the special token for END. - pad_val : int, replace the end_id and the ids after end_id to this value. - is_shorten : boolean, default True. - Shorten the sequences. - remain_end_id : boolean, default False. - Keep an end_id in the end. + sequences : list of list of int + All sequences where each row is a sequence. + end_id : int + The special token for END. + pad_val : int + Replace the `end_id` and the IDs after `end_id` to this value. + is_shorten : boolean + Shorten the sequences. Default is True. + remain_end_id : boolean + Keep an `end_id` in the end. Default is False. + + Returns + ---------- + list of list of int + The processed sequences. Examples --------- @@ -2379,17 +2910,18 @@ def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_en ... [5, 3, 9, 4, 9, 2, 2, 3]] <-- end_id is 2 >>> sentences_ids = precess_sequences(sentences_ids, end_id=vocab.end_id, pad_val=0, is_shorten=True) ... [[4, 3, 5, 3, 0], [5, 3, 9, 4, 9]] + """ max_length = 0 for i_s, seq in enumerate(sequences): is_end = False for i_w, n in enumerate(seq): - if n == end_id and is_end == False: # 1st time to see end_id + if n == end_id and is_end == False: # 1st time to see end_id is_end = True if max_length < i_w: max_length = i_w if remain_end_id is False: - seq[i_w] = pad_val # set end_id to pad_val + seq[i_w] = pad_val # set end_id to pad_val elif is_end == True: seq[i_w] = pad_val @@ -2400,9 +2932,24 @@ def process_sequences(sequences, end_id=0, pad_val=0, is_shorten=True, remain_en sequences[i] = seq[:max_length] return sequences + def sequences_add_start_id(sequences, start_id=0, remove_last=False): """Add special start token(id) in the beginning of each sequence. + Parameters + ------------ + sequences : list of list of int + All sequences where each row is a sequence. + start_id : int + The start ID. + remove_last : boolean + Remove the last value of each sequences. Usually be used for removing the end ID. + + Returns + ---------- + list of list of int + The processed sequences. + Examples --------- >>> sentences_ids = [[4,3,5,3,2,2,2,2], [5,3,9,4,9,2,2,3]] @@ -2411,12 +2958,14 @@ def sequences_add_start_id(sequences, start_id=0, remove_last=False): >>> sentences_ids = sequences_add_start_id(sentences_ids, start_id=2, remove_last=True) ... [[2, 4, 3, 5, 3, 2, 2, 2], [2, 5, 3, 9, 4, 9, 2, 2]] - - For Seq2seq + For Seq2seq + >>> input = [a, b, c] >>> target = [x, y, z] >>> decode_seq = [start_id, a, b] <-- sequences_add_start_id(input, start_id, True) + """ - sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences) + sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) for i in range(len(sequences)): if remove_last: sequences_out[i] = [start_id] + sequences[i][:-1] @@ -2424,21 +2973,30 @@ def sequences_add_start_id(sequences, start_id=0, remove_last=False): sequences_out[i] = [start_id] + sequences[i] return sequences_out + def sequences_add_end_id(sequences, end_id=888): """Add special end token(id) in the end of each sequence. Parameters ----------- - sequences : list of list. - end_id : int. + sequences : list of list of int + All sequences where each row is a sequence. + end_id : int + The end ID. + + Returns + ---------- + list of list of int + The processed sequences. Examples --------- >>> sequences = [[1,2,3],[4,5,6,7]] >>> print(sequences_add_end_id(sequences, end_id=999)) ... [[1, 2, 3, 999], [4, 5, 6, 999]] + """ - sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences) + sequences_out = [[] for _ in range(len(sequences))] #[[]] * len(sequences) for i in range(len(sequences)): sequences_out[i] = sequences[i] + [end_id] return sequences_out @@ -2449,15 +3007,24 @@ def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0): Parameters ----------- - sequences : list of list. - end_id : int. - pad_id : int. + sequences : list of list of int + All sequences where each row is a sequence. + end_id : int + The end ID. + pad_id : int + The pad ID. + + Returns + ---------- + list of list of int + The processed sequences. Examples --------- >>> sequences = [[1,2,0,0], [1,2,3,0], [1,2,3,4]] >>> print(sequences_add_end_id_after_pad(sequences, end_id=99, pad_id=0)) ... [[1, 2, 99, 0], [1, 2, 3, 99], [1, 2, 3, 4]] + """ # sequences_out = [[] for _ in range(len(sequences))]#[[]] * len(sequences) import copy @@ -2481,9 +3048,22 @@ def sequences_add_end_id_after_pad(sequences, end_id=888, pad_id=0): # sequences_out[i] = sequences_out[i][:max_len+1] return sequences_out + def sequences_get_mask(sequences, pad_val=0): """Return mask for sequences. + Parameters + ----------- + sequences : list of list of int + All sequences where each row is a sequence. + pad_val : int + The pad value. + + Returns + ---------- + list of list of int + The mask. + Examples --------- >>> sentences_ids = [[4, 0, 5, 3, 0, 0], @@ -2491,6 +3071,7 @@ def sequences_get_mask(sequences, pad_val=0): >>> mask = sequences_get_mask(sentences_ids, pad_val=0) ... [[1 1 1 1 0 0] ... [1 1 1 1 1 0]] + """ mask = np.ones_like(sequences) for i, seq in enumerate(sequences): @@ -2498,175 +3079,5 @@ def sequences_get_mask(sequences, pad_val=0): if seq[i_w] == pad_val: mask[i, i_w] = 0 else: - break # <-- exit the for loop, prepcess next sequence + break # <-- exit the for loop, prepcess next sequence return mask - - -## Text -# see tensorlayer.nlp - - -## Tensor Opt -# def distorted_images(images=None, height=24, width=24): -# """Distort images for generating more training data. -# -# Features -# --------- -# They are cropped to height * width pixels randomly. -# -# They are approximately whitened to make the model insensitive to dynamic range. -# -# Randomly flip the image from left to right. -# -# Randomly distort the image brightness. -# -# Randomly distort the image contrast. -# -# Whiten (Normalize) the images. -# -# Parameters -# ---------- -# images : 4D Tensor -# The tensor or placeholder of images -# height : int -# The height for random crop. -# width : int -# The width for random crop. -# -# Returns -# ------- -# result : tuple of Tensor -# (Tensor for distorted images, Tensor for while loop index) -# -# Examples -# -------- -# >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) -# >>> sess = tf.InteractiveSession() -# >>> batch_size = 128 -# >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3]) -# >>> distorted_images_op = tl.preprocess.distorted_images(images=x, height=24, width=24) -# >>> sess.run(tf.initialize_all_variables()) -# >>> feed_dict={x: X_train[0:batch_size,:,:,:]} -# >>> distorted_images, idx = sess.run(distorted_images_op, feed_dict=feed_dict) -# >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) -# >>> tl.visualize.images2d(distorted_images[1:10,:,:,:], second=10, saveable=False, name='distorted_images', dtype=None, fig_idx=23012) -# -# Notes -# ------ -# - The first image in 'distorted_images' should be removed. -# -# References -# ----------- -# - `tensorflow.models.image.cifar10.cifar10_input `_ -# """ -# print("This function is deprecated, please use tf.map_fn instead, e.g:\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)") -# exit() -# # print(" [Warning] distorted_images will be deprecated due to speed, see TFRecord tutorial for more info...") -# try: -# batch_size = int(images._shape[0]) -# except: -# raise Exception('unknow batch_size of images') -# distorted_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3])) -# i = tf.Variable(tf.constant(0)) -# -# c = lambda distorted_x, i: tf.less(i, batch_size) -# -# def body(distorted_x, i): -# # 1. Randomly crop a [height, width] section of the image. -# image = tf.random_crop(tf.gather(images, i), [height, width, 3]) -# # 2. Randomly flip the image horizontally. -# image = tf.image.random_flip_left_right(image) -# # 3. Randomly change brightness. -# image = tf.image.random_brightness(image, max_delta=63) -# # 4. Randomly change contrast. -# image = tf.image.random_contrast(image, lower=0.2, upper=1.8) -# # 5. Subtract off the mean and divide by the variance of the pixels. -# image = tf.image.per_image_whitening(image) -# # 6. Append the image to a batch. -# image = tf.expand_dims(image, 0) -# return tf.concat(0, [distorted_x, image]), tf.add(i, 1) -# -# result = tf.while_loop(cond=c, body=body, loop_vars=(distorted_x, i), parallel_iterations=16) -# return result -# -# -# def crop_central_whiten_images(images=None, height=24, width=24): -# """Crop the central of image, and normailize it for test data. -# -# They are cropped to central of height * width pixels. -# -# Whiten (Normalize) the images. -# -# Parameters -# ---------- -# images : 4D Tensor -# The tensor or placeholder of images -# height : int -# The height for central crop. -# width : int -# The width for central crop. -# -# Returns -# ------- -# result : tuple Tensor -# (Tensor for distorted images, Tensor for while loop index) -# -# Examples -# -------- -# >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) -# >>> sess = tf.InteractiveSession() -# >>> batch_size = 128 -# >>> x = tf.placeholder(tf.float32, shape=[batch_size, 32, 32, 3]) -# >>> central_images_op = tl.preprocess.crop_central_whiten_images(images=x, height=24, width=24) -# >>> sess.run(tf.initialize_all_variables()) -# >>> feed_dict={x: X_train[0:batch_size,:,:,:]} -# >>> central_images, idx = sess.run(central_images_op, feed_dict=feed_dict) -# >>> tl.visualize.images2d(X_train[0:9,:,:,:], second=2, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) -# >>> tl.visualize.images2d(central_images[1:10,:,:,:], second=10, saveable=False, name='central_images', dtype=None, fig_idx=23012) -# -# Notes -# ------ -# The first image in 'central_images' should be removed. -# -# Code References -# ---------------- -# - ``tensorflow.models.image.cifar10.cifar10_input`` -# """ -# print("This function is deprecated, please use tf.map_fn instead, e.g:\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_brightness(img, max_delta=32. / 255.), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_contrast(img, lower=0.5, upper=1.5), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_saturation(img, lower=0.5, upper=1.5), t_image)\n \ -# t_image = tf.map_fn(lambda img: tf.image.random_hue(img, max_delta=0.032), t_image)") -# exit() -# # print(" [Warning] crop_central_whiten_images will be deprecated due to speed, see TFRecord tutorial for more info...") -# try: -# batch_size = int(images._shape[0]) -# except: -# raise Exception('unknow batch_size of images') -# central_x = tf.Variable(tf.constant(0.1, shape=[1, height, width, 3])) -# i = tf.Variable(tf.constant(0)) -# -# c = lambda central_x, i: tf.less(i, batch_size) -# -# def body(central_x, i): -# # 1. Crop the central [height, width] of the image. -# image = tf.image.resize_image_with_crop_or_pad(tf.gather(images, i), height, width) -# # 2. Subtract off the mean and divide by the variance of the pixels. -# image = tf.image.per_image_whitening(image) -# # 5. Append the image to a batch. -# image = tf.expand_dims(image, 0) -# return tf.concat(0, [central_x, image]), tf.add(i, 1) -# -# result = tf.while_loop(cond=c, body=body, loop_vars=(central_x, i), parallel_iterations=16) -# return result -# -# -# - - - -# diff --git a/tensorlayer/rein.py b/tensorlayer/rein.py index f37561e4..5021361e 100644 --- a/tensorlayer/rein.py +++ b/tensorlayer/rein.py @@ -1,25 +1,30 @@ #! /usr/bin/python # -*- coding: utf-8 -*- - - -import tensorflow as tf import numpy as np +import tensorflow as tf from six.moves import xrange + def discount_episode_rewards(rewards=[], gamma=0.99, mode=0): - """ Take 1D float array of rewards and compute discounted rewards for an + """Take 1D float array of rewards and compute discounted rewards for an episode. When encount a non-zero value, consider as the end a of an episode. Parameters ---------- - rewards : numpy list - a list of rewards + rewards : list + List of rewards gamma : float - discounted factor + Discounted factor mode : int - if mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game). - if mode == 1, would not reset the discount process. + Mode for computing the discount rewards. + - If mode == 0, reset the discount process when encount a non-zero reward (Ping-pong game). + - If mode == 1, would not reset the discount process. + + Returns + -------- + list of float + The discounted rewards. Examples ---------- @@ -33,6 +38,7 @@ def discount_episode_rewards(rewards=[], gamma=0.99, mode=0): >>> print(discount_rewards) ... [ 1.52110755 1.69011939 1.87791049 2.08656716 1.20729685 1.34144104 ... 1.49048996 1.65610003 0.72899997 0.81 0.89999998 1. ] + """ discounted_r = np.zeros_like(rewards, dtype=np.float32) running_add = 0 @@ -46,18 +52,22 @@ def discount_episode_rewards(rewards=[], gamma=0.99, mode=0): def cross_entropy_reward_loss(logits, actions, rewards, name=None): - """ Calculate the loss for Policy Gradient Network. + """Calculate the loss for Policy Gradient Network. Parameters ---------- logits : tensor - The network outputs without softmax. This function implements softmax - inside. - actions : tensor/ placeholder + The network outputs without softmax. This function implements softmax inside. + actions : tensor or placeholder The agent actions. - rewards : tensor/ placeholder + rewards : tensor or placeholder The rewards. + Returns + -------- + Tensor + The TensorFlow loss function. + Examples ---------- >>> states_batch_pl = tf.placeholder(tf.float32, shape=[None, D]) @@ -70,20 +80,21 @@ def cross_entropy_reward_loss(logits, actions, rewards, name=None): >>> discount_rewards_batch_pl = tf.placeholder(tf.float32, shape=[None]) >>> loss = tl.rein.cross_entropy_reward_loss(probs, actions_batch_pl, discount_rewards_batch_pl) >>> train_op = tf.train.RMSPropOptimizer(learning_rate, decay_rate).minimize(loss) - """ - try: # TF 1.0+ + """ + try: # TF 1.0+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=actions, logits=logits, name=name) except: cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, targets=actions) # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits, actions) - try: ## TF1.0+ + try: ## TF1.0+ loss = tf.reduce_sum(tf.multiply(cross_entropy, rewards)) - except: ## TF0.12 - loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul + except: ## TF0.12 + loss = tf.reduce_sum(tf.mul(cross_entropy, rewards)) # element-wise mul return loss + def log_weight(probs, weights, name='log_weight'): """Log weight. @@ -92,22 +103,33 @@ def log_weight(probs, weights, name='log_weight'): probs : tensor If it is a network output, usually we should scale it to [0, 1] via softmax. weights : tensor + The weights. + + Returns + -------- + Tensor + The Tensor after appling the log weighted expression. + """ with tf.variable_scope(name): exp_v = tf.reduce_mean(tf.log(probs) * weights) return exp_v - def choice_action_by_probs(probs=[0.5, 0.5], action_list=None): """Choice and return an an action by given the action probability distribution. Parameters ------------ - probs : a list of float. + probs : list of float. The probability distribution of all actions. - action_list : None or a list of action in integer, string or others. - If None, returns an integer range between 0 and len(probs)-1. + action_list : None or a list of int or others + A list of action in integer, string or others. If None, returns an integer range between 0 and len(probs)-1. + + Returns + -------- + float int or str + The chosen action. Examples ---------- @@ -125,6 +147,7 @@ def choice_action_by_probs(probs=[0.5, 0.5], action_list=None): ... a ... b ... b + """ if action_list is None: n_action = len(probs) diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py index 5c46dc37..be971c33 100644 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py +++ b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_ops.py @@ -1,12 +1,14 @@ +import os + import tensorflow as tf from tensorflow.python.framework import ops -import os module_path = os.path.realpath(__file__) module_dir = os.path.dirname(module_path) lib_path = os.path.join(module_dir, 'roi_pooling.so') roi_pooling_module = tf.load_op_library(lib_path) + def roi_pooling(input, rois, pool_height, pool_width): """ returns a tensorflow operation for computing the Region of Interest Pooling @@ -29,10 +31,8 @@ def _RoiPoolingGrad(op, *grads): orig_argmax_output = op.outputs[1] orig_output_grad = grads[0] - output_grad = roi_pooling_module.roi_pooling_grad(orig_inputs, orig_rois, orig_output, - orig_argmax_output, orig_output_grad, - pool_height=op.get_attr('pool_height'), - pool_width=op.get_attr('pool_width')) + output_grad = roi_pooling_module.roi_pooling_grad( + orig_inputs, orig_rois, orig_output, orig_argmax_output, orig_output_grad, pool_height=op.get_attr('pool_height'), pool_width=op.get_attr('pool_width')) return [output_grad, None] @@ -47,5 +47,4 @@ def _RoiPoolingShape(op): pool_width = op.get_attr('pool_width') #TODO: check the width/hegiht order - return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]), - tf.TensorShape(None)] + return [tf.TensorShape([n_rois, n_channels, pool_width, pool_height]), tf.TensorShape(None)] diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py index c5f1b361..952e556a 100644 --- a/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py +++ b/tensorlayer/third_party/roi_pooling/roi_pooling/roi_pooling_test.py @@ -1,5 +1,5 @@ -import tensorflow as tf import numpy as np +import tensorflow as tf from roi_pooling_ops import roi_pooling @@ -9,21 +9,10 @@ class RoiPoolingTest(tf.test.TestCase): def test_roi_pooling_grad(self): # TODO(maciek): corner cases - input_value = [[ - [[1], [2], [4], [4]], - [[3], [4], [1], [2]], - [[6], [2], [1], [7.0]], - [[1], [3], [2], [8]] - ]] + input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] input_value = np.asarray(input_value, dtype='float32') - rois_value = [ - [0, 0, 0, 1, 1], - [0, 1, 1, 2, 2], - [0, 2, 2, 3, 3], - [0, 0, 0, 2, 2], - [0, 0, 0, 3, 3] - ] + rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] rois_value = np.asarray(rois_value, dtype='int32') with tf.Session(''): @@ -33,11 +22,9 @@ def test_roi_pooling_grad(self): y = roi_pooling(input_const, rois_const, pool_height=2, pool_width=2) mean = tf.reduce_mean(y) - numerical_grad_error_1 = tf.test.compute_gradient_error( - [input_const], [input_value.shape], y, [5, 2, 2, 1]) + numerical_grad_error_1 = tf.test.compute_gradient_error([input_const], [input_value.shape], y, [5, 2, 2, 1]) - numerical_grad_error_2 = tf.test.compute_gradient_error( - [input_const], [input_value.shape], mean, []) + numerical_grad_error_2 = tf.test.compute_gradient_error([input_const], [input_value.shape], mean, []) self.assertLess(numerical_grad_error_1, 1e-4) self.assertLess(numerical_grad_error_2, 1e-4) @@ -87,7 +74,7 @@ def test_very_big_output(self): than the number of available GPU threads """ - pooled_w, pooled_h = 7,7 + pooled_w, pooled_h = 7, 7 input_w, input_h = 72, 240 n_channels = 512 n_batches = 2 @@ -106,5 +93,6 @@ def test_very_big_output(self): self.assertTrue(np.all(y_output == 1)) + if __name__ == '__main__': tf.test.main() diff --git a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py index 7d9b7b63..050f6deb 100644 --- a/tensorlayer/third_party/roi_pooling/roi_pooling_example.py +++ b/tensorlayer/third_party/roi_pooling/roi_pooling_example.py @@ -1,28 +1,16 @@ from __future__ import print_function -import tensorflow as tf import numpy as np - +import tensorflow as tf from roi_pooling.roi_pooling_ops import roi_pooling -# input feature map going into the RoI pooling -input_value = [[ - [[1], [2], [4], [4]], - [[3], [4], [1], [2]], - [[6], [2], [1], [7.0]], - [[1], [3], [2], [8]] -]] +# input feature map going into the RoI pooling +input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] input_value = np.asarray(input_value, dtype='float32') # Regions of interest as lists of: # feature map index, upper left, bottom right coordinates -rois_value = [ - [0, 0, 0, 1, 1], - [0, 1, 1, 2, 2], - [0, 2, 2, 3, 3], - [0, 0, 0, 2, 2], - [0, 0, 0, 3, 3] -] +rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] rois_value = np.asarray(rois_value, dtype='int32') # the pool_height and width are parameters of the ROI layer diff --git a/tensorlayer/third_party/roi_pooling/setup.py b/tensorlayer/third_party/roi_pooling/setup.py index de392a9d..b262072a 100644 --- a/tensorlayer/third_party/roi_pooling/setup.py +++ b/tensorlayer/third_party/roi_pooling/setup.py @@ -1,32 +1,30 @@ #!/usr/bin/env python from __future__ import print_function -from distutils.core import setup -from distutils.command.install import install as DistutilsInstall -import sys + import subprocess +import sys +from distutils.command.install import install as DistutilsInstall +from distutils.core import setup try: import tensorflow except ImportError: print("Please install tensorflow 0.12.0 or later") sys.exit() - + class MyInstall(DistutilsInstall): def run(self): subprocess.call(['make', '-C', 'roi_pooling', 'build']) DistutilsInstall.run(self) -setup(name='roi-pooling', - version='1.0', - description='ROI pooling as a custom TensorFlow operation', - author='deepsense.io', - packages=['roi_pooling'], - package_data={'roi_pooling': ['roi_pooling.so']}, - cmdclass={'install': MyInstall} -) - - - +setup( + name='roi-pooling', + version='1.0', + description='ROI pooling as a custom TensorFlow operation', + author='deepsense.io', + packages=['roi_pooling'], + package_data={'roi_pooling': ['roi_pooling.so']}, + cmdclass={'install': MyInstall}) diff --git a/tensorlayer/third_party/roi_pooling/test_roi_layer.py b/tensorlayer/third_party/roi_pooling/test_roi_layer.py index 5ca6a12a..d0e27449 100644 --- a/tensorlayer/third_party/roi_pooling/test_roi_layer.py +++ b/tensorlayer/third_party/roi_pooling/test_roi_layer.py @@ -1,28 +1,16 @@ from tensorlayer.layers import * +from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import \ + roi_pooling -from tensorlayer.third_party.roi_pooling.roi_pooling.roi_pooling_ops import roi_pooling # from roi_pooling.roi_pooling_ops import roi_pooling - - # input feature map going into the RoI pooling -input_value = [[ - [[1], [2], [4], [4]], - [[3], [4], [1], [2]], - [[6], [2], [1], [7.0]], - [[1], [3], [2], [8]] -]] +input_value = [[[[1], [2], [4], [4]], [[3], [4], [1], [2]], [[6], [2], [1], [7.0]], [[1], [3], [2], [8]]]] input_value = np.asarray(input_value, dtype='float32') # Regions of interest as lists of: # feature map index, upper left, bottom right coordinates -rois_value = [ - [0, 0, 0, 1, 1], - [0, 1, 1, 2, 2], - [0, 2, 2, 3, 3], - [0, 0, 0, 2, 2], - [0, 0, 0, 3, 3] -] +rois_value = [[0, 0, 0, 1, 1], [0, 1, 1, 2, 2], [0, 2, 2, 3, 3], [0, 0, 0, 2, 2], [0, 0, 0, 3, 3]] rois_value = np.asarray(rois_value, dtype='int32') # the pool_height and width are parameters of the ROI layer diff --git a/tensorlayer/utils.py b/tensorlayer/utils.py index 12a3f4bb..90a47bef 100644 --- a/tensorlayer/utils.py +++ b/tensorlayer/utils.py @@ -1,64 +1,91 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- +import os +import random +import subprocess +import sys +import time +from sys import exit as _exit +from sys import platform as _platform + +import numpy as np import tensorflow as tf import tensorlayer as tl + +from . import _logging as logging from . import iterate -import numpy as np -import time -import math -import random -def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_size=100, - n_epoch=100, print_freq=5, X_val=None, y_val=None, eval_train=True, - tensorboard=False, tensorboard_epoch_freq=5, tensorboard_weight_histograms=True, tensorboard_graph_vis=True): - """Traing a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. +def fit(sess, + network, + train_op, + cost, + X_train, + y_train, + x, + y_, + acc=None, + batch_size=100, + n_epoch=100, + print_freq=5, + X_val=None, + y_val=None, + eval_train=True, + tensorboard=False, + tensorboard_epoch_freq=5, + tensorboard_weight_histograms=True, + tensorboard_graph_vis=True): + """Training a given non time-series network by the given cost function, training data, batch_size, n_epoch etc. + + - MNIST example click `here `_. + - In order to control the training details, the authors HIGHLY recommend ``tl.iterate`` see two MNIST examples `1 `_, `2 `_. Parameters ---------- - sess : TensorFlow session - sess = tf.InteractiveSession() - network : a TensorLayer layer - the network will be trained - train_op : a TensorFlow optimizer - like tf.train.AdamOptimizer - X_train : numpy array - the input of training data - y_train : numpy array - the target of training data + sess : Session + TensorFlow Session. + network : TensorLayer layer + the network to be trained. + train_op : TensorFlow optimizer + The optimizer for training e.g. tf.train.AdamOptimizer. + X_train : numpy.array + The input of training data + y_train : numpy.array + The target of training data x : placeholder - for inputs + For inputs. y_ : placeholder - for targets - acc : the TensorFlow expression of accuracy (or other metric) or None - if None, would not display the metric + For targets. + acc : TensorFlow expression or None + Metric for accuracy or others. If None, would not print the information. batch_size : int - batch size for training and evaluating + The batch size for training and evaluating. n_epoch : int - the number of training epochs + The number of training epochs. print_freq : int - display the training information every ``print_freq`` epochs - X_val : numpy array or None - the input of validation data - y_val : numpy array or None - the target of validation data + Print the training information every ``print_freq`` epochs. + X_val : numpy.array or None + The input of validation data. If None, would not perform validation. + y_val : numpy.array or None + The target of validation data. If None, would not perform validation. eval_train : boolean - if X_val and y_val are not None, it refects whether to evaluate the training data + Whether to evaluate the model during training. + If X_val and y_val are not None, it reflects whether to evaluate the model on training data. tensorboard : boolean - if True summary data will be stored to the log/ direcory for visualization with tensorboard. + If True, summary data will be stored to the log/ directory for visualization with tensorboard. See also detailed tensorboard_X settings for specific configurations of features. (default False) - Also runs tl.layers.initialize_global_variables(sess) internally in fit() to setup the summary nodes, see Note: + Also runs `tl.layers.initialize_global_variables(sess)` internally in fit() to setup the summary nodes. tensorboard_epoch_freq : int - how many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5) + How many epochs between storing tensorboard checkpoint for visualization to log/ directory (default 5). tensorboard_weight_histograms : boolean - if True updates tensorboard data in the logs/ directory for visulaization - of the weight histograms every tensorboard_epoch_freq epoch (default True) + If True updates tensorboard data in the logs/ directory for visualization + of the weight histograms every tensorboard_epoch_freq epoch (default True). tensorboard_graph_vis : boolean - if True stores the graph in the tensorboard summaries saved to log/ (default True) + If True stores the graph in the tensorboard summaries saved to log/ (default True). Examples -------- - >>> see tutorial_mnist_simple.py + See `tutorial_mnist_simple.py `_ + >>> tl.utils.fit(sess, network, train_op, cost, X_train, y_train, x, y_, ... acc=acc, batch_size=500, n_epoch=200, print_freq=5, ... X_val=X_val, y_val=y_val, eval_train=False) @@ -69,31 +96,32 @@ def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_ Notes -------- - If tensorboard=True, the global_variables_initializer will be run inside the fit function - in order to initalize the automatically generated summary nodes used for tensorboard visualization, - thus tf.global_variables_initializer().run() before the fit() call will be undefined. + If tensorboard=True, the `global_variables_initializer` will be run inside the fit function + in order to initialize the automatically generated summary nodes used for tensorboard visualization, + thus `tf.global_variables_initializer().run()` before the `fit()` call will be undefined. + """ assert X_train.shape[0] >= batch_size, "Number of training examples should be bigger than the batch size" - if(tensorboard): - print("Setting up tensorboard ...") + if (tensorboard): + logging.info("Setting up tensorboard ...") #Set up tensorboard summaries and saver tl.files.exists_or_mkdir('logs/') #Only write summaries for more recent TensorFlow versions if hasattr(tf, 'summary') and hasattr(tf.summary, 'FileWriter'): if tensorboard_graph_vis: - train_writer = tf.summary.FileWriter('logs/train',sess.graph) - val_writer = tf.summary.FileWriter('logs/validation',sess.graph) + train_writer = tf.summary.FileWriter('logs/train', sess.graph) + val_writer = tf.summary.FileWriter('logs/validation', sess.graph) else: train_writer = tf.summary.FileWriter('logs/train') val_writer = tf.summary.FileWriter('logs/validation') #Set up summary nodes - if(tensorboard_weight_histograms): + if (tensorboard_weight_histograms): for param in network.all_params: if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): - print('Param name ', param.name) + logging.info('Param name %s' % param.name) tf.summary.histogram(param.name, param) if hasattr(tf, 'summary') and hasattr(tf.summary, 'histogram'): @@ -103,51 +131,48 @@ def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_ #Initalize all variables and summaries tl.layers.initialize_global_variables(sess) - print("Finished! use $tensorboard --logdir=logs/ to start server") + logging.info("Finished! use $tensorboard --logdir=logs/ to start server") - print("Start training the network ...") + logging.info("Start training the network ...") start_time_begin = time.time() tensorboard_train_index, tensorboard_val_index = 0, 0 for epoch in range(n_epoch): start_time = time.time() - loss_ep = 0; n_step = 0 - for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, - batch_size, shuffle=True): + loss_ep = 0 + n_step = 0 + for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): feed_dict = {x: X_train_a, y_: y_train_a} - feed_dict.update( network.all_drop ) # enable noise layers + feed_dict.update(network.all_drop) # enable noise layers loss, _ = sess.run([cost, train_op], feed_dict=feed_dict) loss_ep += loss n_step += 1 - loss_ep = loss_ep/ n_step + loss_ep = loss_ep / n_step if tensorboard and hasattr(tf, 'summary'): - if epoch+1 == 1 or (epoch+1) % tensorboard_epoch_freq == 0: - for X_train_a, y_train_a in iterate.minibatches( - X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one( network.all_drop ) # disable noise layers + if epoch + 1 == 1 or (epoch + 1) % tensorboard_epoch_freq == 0: + for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): + dp_dict = dict_to_one(network.all_drop) # disable noise layers feed_dict = {x: X_train_a, y_: y_train_a} feed_dict.update(dp_dict) result = sess.run(merged, feed_dict=feed_dict) train_writer.add_summary(result, tensorboard_train_index) tensorboard_train_index += 1 if (X_val is not None) and (y_val is not None): - for X_val_a, y_val_a in iterate.minibatches( - X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one( network.all_drop ) # disable noise layers - feed_dict = {x: X_val_a, y_: y_val_a} - feed_dict.update(dp_dict) - result = sess.run(merged, feed_dict=feed_dict) - val_writer.add_summary(result, tensorboard_val_index) - tensorboard_val_index += 1 + for X_val_a, y_val_a in iterate.minibatches(X_val, y_val, batch_size, shuffle=True): + dp_dict = dict_to_one(network.all_drop) # disable noise layers + feed_dict = {x: X_val_a, y_: y_val_a} + feed_dict.update(dp_dict) + result = sess.run(merged, feed_dict=feed_dict) + val_writer.add_summary(result, tensorboard_val_index) + tensorboard_val_index += 1 if epoch + 1 == 1 or (epoch + 1) % print_freq == 0: if (X_val is not None) and (y_val is not None): - print("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) + logging.info("Epoch %d of %d took %fs" % (epoch + 1, n_epoch, time.time() - start_time)) if eval_train is True: train_loss, train_acc, n_batch = 0, 0, 0 - for X_train_a, y_train_a in iterate.minibatches( - X_train, y_train, batch_size, shuffle=True): - dp_dict = dict_to_one( network.all_drop ) # disable noise layers + for X_train_a, y_train_a in iterate.minibatches(X_train, y_train, batch_size, shuffle=True): + dp_dict = dict_to_one(network.all_drop) # disable noise layers feed_dict = {x: X_train_a, y_: y_train_a} feed_dict.update(dp_dict) if acc is not None: @@ -155,14 +180,14 @@ def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_ train_acc += ac else: err = sess.run(cost, feed_dict=feed_dict) - train_loss += err; n_batch += 1 - print(" train loss: %f" % (train_loss/ n_batch)) + train_loss += err + n_batch += 1 + logging.info(" train loss: %f" % (train_loss / n_batch)) if acc is not None: - print(" train acc: %f" % (train_acc/ n_batch)) + logging.info(" train acc: %f" % (train_acc / n_batch)) val_loss, val_acc, n_batch = 0, 0, 0 - for X_val_a, y_val_a in iterate.minibatches( - X_val, y_val, batch_size, shuffle=True): - dp_dict = dict_to_one( network.all_drop ) # disable noise layers + for X_val_a, y_val_a in iterate.minibatches(X_val, y_val, batch_size, shuffle=True): + dp_dict = dict_to_one(network.all_drop) # disable noise layers feed_dict = {x: X_val_a, y_: y_val_a} feed_dict.update(dp_dict) if acc is not None: @@ -170,13 +195,15 @@ def fit(sess, network, train_op, cost, X_train, y_train, x, y_, acc=None, batch_ val_acc += ac else: err = sess.run(cost, feed_dict=feed_dict) - val_loss += err; n_batch += 1 - print(" val loss: %f" % (val_loss/ n_batch)) + val_loss += err + n_batch += 1 + logging.info(" val loss: %f" % (val_loss / n_batch)) if acc is not None: - print(" val acc: %f" % (val_acc/ n_batch)) + logging.info(" val acc: %f" % (val_acc / n_batch)) else: - print("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)) - print("Total training time: %fs" % (time.time() - start_time_begin)) + logging.info("Epoch %d of %d took %fs, loss %f" % (epoch + 1, n_epoch, time.time() - start_time, loss_ep)) + logging.info("Total training time: %fs" % (time.time() - start_time_begin)) + def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): """ @@ -184,46 +211,48 @@ def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): Parameters ---------- - sess : TensorFlow session - sess = tf.InteractiveSession() - network : a TensorLayer layer - the network will be trained - acc : the TensorFlow expression of accuracy (or other metric) or None - if None, would not display the metric - X_test : numpy array - the input of test data + sess : Session + TensorFlow session. + network : TensorLayer layer + The network. + acc : TensorFlow expression or None + Metric for accuracy or others. + - If None, would not print the information. + X_test : numpy.array + The input of testing data. y_test : numpy array - the target of test data + The target of testing data x : placeholder - for inputs + For inputs. y_ : placeholder - for targets + For targets. batch_size : int or None - batch size for testing, when dataset is large, we should use minibatche for testing. - when dataset is small, we can set it to None. - cost : the TensorFlow expression of cost or None - if None, would not display the cost + The batch size for testing, when dataset is large, we should use minibatche for testing; + if dataset is small, we can set it to None. + cost : TensorFlow expression or None + Metric for cost or others. If None, would not print the information. Examples -------- - >>> see tutorial_mnist_simple.py + See `tutorial_mnist_simple.py `_ + >>> tl.utils.test(sess, network, acc, X_test, y_test, x, y_, batch_size=None, cost=cost) + """ - print('Start testing the network ...') + logging.info('Start testing the network ...') if batch_size is None: - dp_dict = dict_to_one( network.all_drop ) + dp_dict = dict_to_one(network.all_drop) feed_dict = {x: X_test, y_: y_test} feed_dict.update(dp_dict) if cost is not None: - print(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) - print(" test acc: %f" % sess.run(acc, feed_dict=feed_dict)) - # print(" test acc: %f" % np.mean(y_test == sess.run(y_op, - # feed_dict=feed_dict))) + logging.info(" test loss: %f" % sess.run(cost, feed_dict=feed_dict)) + logging.info(" test acc: %f" % sess.run(acc, feed_dict=feed_dict)) + # logging.info(" test acc: %f" % np.mean(y_test == sess.run(y_op, + # feed_dict=feed_dict))) else: test_loss, test_acc, n_batch = 0, 0, 0 - for X_test_a, y_test_a in iterate.minibatches( - X_test, y_test, batch_size, shuffle=True): - dp_dict = dict_to_one( network.all_drop ) # disable noise layers + for X_test_a, y_test_a in iterate.minibatches(X_test, y_test, batch_size, shuffle=True): + dp_dict = dict_to_one(network.all_drop) # disable noise layers feed_dict = {x: X_test_a, y_: y_test_a} feed_dict.update(dp_dict) if cost is not None: @@ -231,10 +260,12 @@ def test(sess, network, acc, X_test, y_test, x, y_, batch_size, cost=None): test_loss += err else: ac = sess.run(acc, feed_dict=feed_dict) - test_acc += ac; n_batch += 1 + test_acc += ac + n_batch += 1 if cost is not None: - print(" test loss: %f" % (test_loss/ n_batch)) - print(" test acc: %f" % (test_acc/ n_batch)) + logging.info(" test loss: %f" % (test_loss / n_batch)) + logging.info(" test acc: %f" % (test_acc / n_batch)) + def predict(sess, network, X, x, y_op, batch_size=None): """ @@ -242,58 +273,67 @@ def predict(sess, network, X, x, y_op, batch_size=None): Parameters ---------- - sess : TensorFlow session - sess = tf.InteractiveSession() - network : a TensorLayer layer - the network will be trained - X : numpy array - the input + sess : Session + TensorFlow Session. + network : TensorLayer layer + The network. + X : numpy.array + The inputs. x : placeholder - for inputs + For inputs. y_op : placeholder - the argmax expression of softmax outputs + The argmax expression of softmax outputs. batch_size : int or None - batch size for prediction, when dataset is large, we should use minibatche for prediction. - when dataset is small, we can set it to None. + The batch size for prediction, when dataset is large, we should use minibatche for prediction; + if dataset is small, we can set it to None. Examples -------- - >>> see tutorial_mnist_simple.py + See `tutorial_mnist_simple.py `_ + >>> y = network.outputs >>> y_op = tf.argmax(tf.nn.softmax(y), 1) >>> print(tl.utils.predict(sess, network, X_test, x, y_op)) + """ if batch_size is None: - dp_dict = dict_to_one( network.all_drop ) # disable noise layers - feed_dict = {x: X,} + dp_dict = dict_to_one(network.all_drop) # disable noise layers + feed_dict = { + x: X, + } feed_dict.update(dp_dict) return sess.run(y_op, feed_dict=feed_dict) else: result = None - for X_a, _ in iterate.minibatches( - X, X, batch_size, shuffle=False): - dp_dict = dict_to_one( network.all_drop ) - feed_dict = {x: X_a, } + for X_a, _ in iterate.minibatches(X, X, batch_size, shuffle=False): + dp_dict = dict_to_one(network.all_drop) + feed_dict = { + x: X_a, + } feed_dict.update(dp_dict) result_a = sess.run(y_op, feed_dict=feed_dict) if result is None: result = result_a else: - result = np.vstack((result, result_a)) + result = np.vstack((result, result_a)) # TODO: https://github.com/tensorlayer/tensorlayer/issues/288 if result is None: if len(X) % batch_size != 0: dp_dict = dict_to_one(network.all_drop) - feed_dict = {x: X[-(len(X) % batch_size):, :], } + feed_dict = { + x: X[-(len(X) % batch_size):, :], + } feed_dict.update(dp_dict) result_a = sess.run(y_op, feed_dict=feed_dict) result = result_a else: if len(X) != len(result) and len(X) % batch_size != 0: dp_dict = dict_to_one(network.all_drop) - feed_dict = {x: X[-(len(X) % batch_size):, :], } + feed_dict = { + x: X[-(len(X) % batch_size):, :], + } feed_dict.update(dp_dict) result_a = sess.run(y_op, feed_dict=feed_dict) - result = np.vstack((result, result_a)) + result = np.vstack((result, result_a)) # TODO: https://github.com/tensorlayer/tensorlayer/issues/288 return result @@ -306,28 +346,30 @@ def evaluation(y_test=None, y_predict=None, n_classes=None): Parameters ---------- - y_test : numpy.array or list - target results - y_predict : numpy.array or list - predicted results + y_test : list + The target results + y_predict : list + The predicted results n_classes : int - number of classes + The number of classes Examples -------- - >>> c_mat, f1, acc, f1_macro = evaluation(y_test, y_predict, n_classes) + >>> c_mat, f1, acc, f1_macro = tl.utils.evaluation(y_test, y_predict, n_classes) + """ from sklearn.metrics import confusion_matrix, f1_score, accuracy_score - c_mat = confusion_matrix(y_test, y_predict, labels = [x for x in range(n_classes)]) - f1 = f1_score(y_test, y_predict, average = None, labels = [x for x in range(n_classes)]) + c_mat = confusion_matrix(y_test, y_predict, labels=[x for x in range(n_classes)]) + f1 = f1_score(y_test, y_predict, average=None, labels=[x for x in range(n_classes)]) f1_macro = f1_score(y_test, y_predict, average='macro') - acc = accuracy_score(y_test, y_predict) - print('confusion matrix: \n',c_mat) - print('f1-score:',f1) - print('f1-score(macro):',f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') - print('accuracy-score:', acc) + acc = accuracy_score(y_test, y_predict) + logging.info('confusion matrix: \n%s' % c_mat) + logging.info('f1-score : %s' % f1) + logging.info('f1-score(macro) : %f' % f1_macro) # same output with > f1_score(y_true, y_pred, average='macro') + logging.info('accuracy-score : %f' % acc) return c_mat, f1, acc, f1_macro + def dict_to_one(dp_dict={}): """ Input a dictionary, return a dictionary that all items are set to one, @@ -336,17 +378,19 @@ def dict_to_one(dp_dict={}): Parameters ---------- dp_dict : dictionary - keeping probabilities + The dictionary contains key and number, e.g. keeping probabilities. Examples -------- >>> dp_dict = dict_to_one( network.all_drop ) >>> dp_dict = dict_to_one( network.all_drop ) >>> feed_dict.update(dp_dict) + """ return {x: 1 for x in dp_dict} -def flatten_list(list_of_list=[[],[]]): + +def flatten_list(list_of_list=[[], []]): """ Input a list of list, return a list that all items are in a list. @@ -358,57 +402,62 @@ def flatten_list(list_of_list=[[],[]]): -------- >>> tl.utils.flatten_list([[1, 2, 3],[4, 5],[6]]) ... [1, 2, 3, 4, 5, 6] + """ return sum(list_of_list, []) + def class_balancing_oversample(X_train=None, y_train=None, printable=True): """Input the features and labels, return the features and labels after oversampling. Parameters ---------- X_train : numpy.array - Features, each row is an example + The inputs. y_train : numpy.array - Labels + The targets. Examples -------- - - One X + One X + >>> X_train, y_train = class_balancing_oversample(X_train, y_train, printable=True) - - Two X + Two X + >>> X, y = tl.utils.class_balancing_oversample(X_train=np.hstack((X1, X2)), y_train=y, printable=False) >>> X1 = X[:, 0:5] >>> X2 = X[:, 5:] + """ # ======== Classes balancing if printable: - print("Classes balancing for training examples...") + logging.info("Classes balancing for training examples...") from collections import Counter c = Counter(y_train) if printable: - print('the occurrence number of each stage: %s' % c.most_common()) - print('the least stage is Label %s have %s instances' % c.most_common()[-1]) - print('the most stage is Label %s have %s instances' % c.most_common(1)[0]) + logging.info('the occurrence number of each stage: %s' % c.most_common()) + logging.info('the least stage is Label %s have %s instances' % c.most_common()[-1]) + logging.info('the most stage is Label %s have %s instances' % c.most_common(1)[0]) most_num = c.most_common(1)[0][1] if printable: - print('most num is %d, all classes tend to be this num' % most_num) + logging.info('most num is %d, all classes tend to be this num' % most_num) locations = {} number = {} - for lab, num in c.most_common(): # find the index from y_train + for lab, num in c.most_common(): # find the index from y_train number[lab] = num - locations[lab] = np.where(np.array(y_train)==lab)[0] + locations[lab] = np.where(np.array(y_train) == lab)[0] if printable: - print('convert list(np.array) to dict format') + logging.info('convert list(np.array) to dict format') X = {} # convert list to dict for lab, num in number.items(): X[lab] = X_train[locations[lab]] # oversampling if printable: - print('start oversampling') + logging.info('start oversampling') for key in X: temp = X[key] while True: @@ -416,28 +465,28 @@ def class_balancing_oversample(X_train=None, y_train=None, printable=True): break X[key] = np.vstack((X[key], temp)) if printable: - print('first features of label 0 >', len(X[0][0])) - print('the occurrence num of each stage after oversampling') + logging.info('first features of label 0 > %d' % len(X[0][0])) + logging.info('the occurrence num of each stage after oversampling') for key in X: - print(key, len(X[key])) + logging.info("%s %d" % (key, len(X[key]))) if printable: - print('make each stage have same num of instances') + logging.info('make each stage have same num of instances') for key in X: - X[key] = X[key][0:most_num,:] - print(key, len(X[key])) + X[key] = X[key][0:most_num, :] + logging.info("%s %d" % (key, len(X[key]))) # convert dict to list if printable: - print('convert from dict to list format') + logging.info('convert from dict to list format') y_train = [] - X_train = np.empty(shape=(0,len(X[0][0]))) + X_train = np.empty(shape=(0, len(X[0][0]))) for key in X: - X_train = np.vstack( (X_train, X[key] ) ) + X_train = np.vstack((X_train, X[key])) y_train.extend([key for i in range(len(X[key]))]) - # print(len(X_train), len(y_train)) + # logging.info(len(X_train), len(y_train)) c = Counter(y_train) if printable: - print('the occurrence number of each stage after oversampling: %s' % c.most_common()) + logging.info('the occurrence number of each stage after oversampling: %s' % c.most_common()) # ================ End of Classes balancing return X_train, y_train @@ -450,110 +499,126 @@ def get_random_int(min=0, max=10, number=5, seed=None): --------- >>> r = get_random_int(min=0, max=10, number=5) ... [10, 2, 3, 3, 7] + """ rnd = random.Random() if seed: rnd = random.Random(seed) # return [random.randint(min,max) for p in range(0, number)] - return [rnd.randint(min,max) for p in range(0, number)] + return [rnd.randint(min, max) for p in range(0, number)] + def list_string_to_dict(string): """Inputs ``['a', 'b', 'c']``, returns ``{'a': 0, 'b': 1, 'c': 2}``.""" dictionary = {} for idx, c in enumerate(string): - dictionary.update({c:idx}) + dictionary.update({c: idx}) return dictionary -# -# def class_balancing_sequence_4D(X_train, y_train, sequence_length, model='downsampling' ,printable=True): -# ''' 输入、输出都是sequence format -# oversampling or downsampling -# ''' -# n_features = X_train.shape[2] -# # ======== Classes balancing for sequence -# if printable: -# print("Classes balancing for 4D sequence training examples...") -# from collections import Counter -# c = Counter(y_train) # Counter({2: 454, 4: 267, 3: 124, 1: 57, 0: 48}) -# if printable: -# print('the occurrence number of each stage: %s' % c.most_common()) -# print('the least Label %s have %s instances' % c.most_common()[-1]) -# print('the most Label %s have %s instances' % c.most_common(1)[0]) -# # print(c.most_common()) # [(2, 454), (4, 267), (3, 124), (1, 57), (0, 48)] -# most_num = c.most_common(1)[0][1] -# less_num = c.most_common()[-1][1] -# -# locations = {} -# number = {} -# for lab, num in c.most_common(): -# number[lab] = num -# locations[lab] = np.where(np.array(y_train)==lab)[0] -# # print(locations) -# # print(number) -# if printable: -# print(' convert list to dict') -# X = {} # convert list to dict -# ### a sequence -# for lab, _ in number.items(): -# X[lab] = np.empty(shape=(0,1,n_features,1)) # 4D -# for lab, _ in number.items(): -# #X[lab] = X_train[locations[lab] -# for l in locations[lab]: -# X[lab] = np.vstack((X[lab], X_train[l*sequence_length : (l+1)*(sequence_length)])) -# # X[lab] = X_train[locations[lab]*sequence_length : locations[lab]*(sequence_length+1)] # a sequence -# # print(X) -# -# if model=='oversampling': -# if printable: -# print(' oversampling -- most num is %d, all classes tend to be this num\nshuffle applied' % most_num) -# for key in X: -# temp = X[key] -# while True: -# if len(X[key]) >= most_num * sequence_length: # sequence -# break -# X[key] = np.vstack((X[key], temp)) -# # print(key, len(X[key])) -# if printable: -# print(' make each stage have same num of instances') -# for key in X: -# X[key] = X[key][0:most_num*sequence_length,:] # sequence -# if printable: -# print(key, len(X[key])) -# elif model=='downsampling': -# import random -# if printable: -# print(' downsampling -- less num is %d, all classes tend to be this num by randomly choice without replacement\nshuffle applied' % less_num) -# for key in X: -# # print(key, len(X[key]))#, len(X[key])/sequence_length) -# s_idx = [ i for i in range(int(len(X[key])/sequence_length))] -# s_idx = np.asarray(s_idx)*sequence_length # start index of sequnce in X[key] -# # print('s_idx',s_idx) -# r_idx = np.random.choice(s_idx, less_num, replace=False) # random choice less_num of s_idx -# # print('r_idx',r_idx) -# temp = X[key] -# X[key] = np.empty(shape=(0,1,n_features,1)) # 4D -# for idx in r_idx: -# X[key] = np.vstack((X[key], temp[idx:idx+sequence_length])) -# # print(key, X[key]) -# # np.random.choice(l, len(l), replace=False) -# else: -# raise Exception(' model should be oversampling or downsampling') -# -# # convert dict to list -# if printable: -# print(' convert dict to list') -# y_train = [] -# # X_train = np.empty(shape=(0,len(X[0][0]))) -# # X_train = np.empty(shape=(0,len(X[1][0]))) # 2D -# X_train = np.empty(shape=(0,1,n_features,1)) # 4D -# l_key = list(X.keys()) # shuffle -# random.shuffle(l_key) # shuffle -# # for key in X: # no shuffle -# for key in l_key: # shuffle -# X_train = np.vstack( (X_train, X[key] ) ) -# # print(len(X[key])) -# y_train.extend([key for i in range(int(len(X[key])/sequence_length))]) -# # print(X_train,y_train, type(X_train), type(y_train)) -# # ================ End of Classes balancing for sequence -# # print(X_train.shape, len(y_train)) -# return X_train, np.asarray(y_train) + +def exit_tensorflow(sess=None, port=6006): + """Close TensorFlow session, TensorBoard and Nvidia-process if available. + + Parameters + ---------- + sess : Session + TensorFlow Session. + tb_port : int + TensorBoard port you want to close, `6006` as default. + + """ + text = "[TL] Close tensorboard and nvidia-process if available" + text2 = "[TL] Close tensorboard and nvidia-process not yet supported by this function (tl.ops.exit_tf) on " + if sess != None: + sess.close() + # import time + # time.sleep(2) + if _platform == "linux" or _platform == "linux2": + logging.info('linux: %s' % text) + os.system('nvidia-smi') + os.system('fuser ' + port + '/tcp -k') # kill tensorboard 6006 + os.system("nvidia-smi | grep python |awk '{print $3}'|xargs kill") # kill all nvidia-smi python process + _exit() + elif _platform == "darwin": + logging.info('OS X: %s' % text) + subprocess.Popen("lsof -i tcp:" + str(port) + " | grep -v PID | awk '{print $2}' | xargs kill", shell=True) # kill tensorboard + elif _platform == "win32": + logging.info(text2 + "Windows") + # TODO + else: + logging.info(text2 + _platform) + + +def open_tensorboard(log_dir='/tmp/tensorflow', port=6006): + """Open Tensorboard. + + Parameters + ---------- + log_dir : str + Directory where your tensorboard logs are saved + port : int + TensorBoard port you want to open, 6006 is tensorboard default + + """ + text = "[TL] Open tensorboard, go to localhost:" + str(port) + " to access" + text2 = " not yet supported by this function (tl.ops.open_tb)" + + if not tl.files.exists_or_mkdir(log_dir, verbose=False): + logging.info("[TL] Log reportory was created at %s" % log_dir) + + if _platform == "linux" or _platform == "linux2": + logging.info('linux %s' % text2) + # TODO + elif _platform == "darwin": + logging.info('OS X: %s' % text) + subprocess.Popen( + sys.prefix + " | python -m tensorflow.tensorboard --logdir=" + log_dir + " --port=" + str(port), + shell=True) # open tensorboard in localhost:6006/ or whatever port you chose + elif _platform == "win32": + logging.info('Windows%s' % text2) + # TODO + else: + logging.info(_platform + text2) + + +def clear_all_placeholder_variables(printable=True): + """Clears all the placeholder variables of keep prob, + including keeping probabilities of all dropout, denoising, dropconnect etc. + + Parameters + ---------- + printable : boolean + If True, print all deleted variables. + + """ + logging.info('clear all .....................................') + gl = globals().copy() + for var in gl: + if var[0] == '_': continue + if 'func' in str(globals()[var]): continue + if 'module' in str(globals()[var]): continue + if 'class' in str(globals()[var]): continue + + if printable: + logging.info(" clear_all ------- %s" % str(globals()[var])) + + del globals()[var] + + +def set_gpu_fraction(gpu_fraction=0.3): + """Set the GPU memory fraction for the application. + + Parameters + ---------- + gpu_fraction : float + Fraction of GPU memory, (0 ~ 1] + + References + ---------- + - `TensorFlow using GPU `__ + + """ + logging.info("[TL]: GPU MEM Fraction %f" % gpu_fraction) + gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=gpu_fraction) + sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) + return sess diff --git a/tensorlayer/visualize.py b/tensorlayer/visualize.py index 86b65f07..12452a06 100644 --- a/tensorlayer/visualize.py +++ b/tensorlayer/visualize.py @@ -1,84 +1,120 @@ -#! /usr/bin/python # -*- coding: utf-8 -*- +import os import matplotlib -## use this, if you got the following error: -# _tkinter.TclError: no display name and no $DISPLAY environment variable -# matplotlib.use('Agg') - import numpy as np -import os -from . import prepro - # save/read image(s) import scipy.misc +from . import _logging as logging +from . import prepro + +# Uncomment the following line if you got: _tkinter.TclError: no display name and no $DISPLAY environment variable +# matplotlib.use('Agg') + + def read_image(image, path=''): - """ Read one image. + """Read one image. Parameters ----------- - images : string, file name. - path : string, path. + image : str + The image file name. + path : str + The image folder path. + + Returns + ------- + numpy.array + The image. + """ return scipy.misc.imread(os.path.join(path, image)) + def read_images(img_list, path='', n_threads=10, printable=True): - """ Returns all images in list by given path and name of each image file. + """Returns all images in list by given path and name of each image file. Parameters ------------- - img_list : list of string, the image file names. - path : string, image folder path. - n_threads : int, number of thread to read image. - printable : bool, print infomation when reading images, default is True. + img_list : list of str + The image file names. + path : str + The image folder path. + n_threads : int + The number of threads to read image. + printable : boolean + Whether to print information when reading images. + + Returns + ------- + list of numpy.array + The images. + """ imgs = [] for idx in range(0, len(img_list), n_threads): - b_imgs_list = img_list[idx : idx + n_threads] + b_imgs_list = img_list[idx:idx + n_threads] b_imgs = prepro.threading_data(b_imgs_list, fn=read_image, path=path) - # print(b_imgs.shape) + # logging.info(b_imgs.shape) imgs.extend(b_imgs) if printable: - print('read %d from %s' % (len(imgs), path)) + logging.info('read %d from %s' % (len(imgs), path)) return imgs + def save_image(image, image_path=''): - """Save one image. + """Save a image. Parameters ----------- - images : numpy array [w, h, c] - image_path : string. + image : numpy array + [w, h, c] + image_path : str + path + """ - try: # RGB + try: # RGB scipy.misc.imsave(image_path, image) - except: # Greyscale - scipy.misc.imsave(image_path, image[:,:,0]) + except: # Greyscale + scipy.misc.imsave(image_path, image[:, :, 0]) + def save_images(images, size, image_path=''): - """Save mutiple images into one single image. + """Save multiple images into one single image. Parameters ----------- - images : numpy array [batch, w, h, c] - size : list of two int, row and column number. + images : numpy array + (batch, w, h, c) + size : list of 2 ints + row and column number. number of images should be equal or less than size[0] * size[1] - image_path : string. + image_path : str + save path + + Returns + ------- + numpy.array + The image. Examples --------- >>> images = np.random.rand(64, 100, 100, 3) >>> tl.visualize.save_images(images, [8, 8], 'temp.png') + """ + if len(images.shape) == 3: # Greyscale [batch, h, w] --> [batch, h, w, 1] + images = images[:, :, :, np.newaxis] + def merge(images, size): h, w = images.shape[1], images.shape[2] img = np.zeros((h * size[0], w * size[1], 3)) for idx, image in enumerate(images): i = idx % size[1] j = idx // size[1] - img[j*h:j*h+h, i*w:i*w+w, :] = image + img[j * h:j * h + h, i * w:i * w + w, :] = image return img def imsave(images, size, path): @@ -87,34 +123,45 @@ def imsave(images, size, path): assert len(images) <= size[0] * size[1], "number of images should be equal or less than size[0] * size[1] {}".format(len(images)) return imsave(images, size, image_path) -# for object detection -def draw_boxes_and_labels_to_image(image, classes=[], coords=[], - scores=[], classes_list=[], - is_center=True, is_rescale=True, save_name=None): - """ Draw bboxes and class labels on image. Return or save the image with bboxes, example in the docs of ``tl.prepro``. + +def draw_boxes_and_labels_to_image(image, classes=[], coords=[], scores=[], classes_list=[], is_center=True, is_rescale=True, save_name=None): + """Draw bboxes and class labels on image. Return or save the image with bboxes, example in the docs of ``tl.prepro``. Parameters ----------- - image : RGB image in numpy.array, [height, width, channel]. - classes : a list of class ID (int). - coords : a list of list for coordinates. - - Should be [x, y, x2, y2] (up-left and botton-right format) - - If [x_center, y_center, w, h] (set is_center to True). - scores : a list of score (float). (Optional) - classes_list : list of string, for converting ID to string on image. - is_center : boolean, defalt is True. - If coords is [x_center, y_center, w, h], set it to True for converting [x_center, y_center, w, h] to [x, y, x2, y2] (up-left and botton-right). - If coords is [x1, x2, y1, y2], set it to False. - is_rescale : boolean, defalt is True. - If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally. - If False, feed the coordinates with pixel unit format. - save_name : None or string + image : numpy.array + The RGB image [height, width, channel]. + classes : list of int + A list of class ID (int). + coords : list of int + A list of list for coordinates. + - Should be [x, y, x2, y2] (up-left and botton-right format) + - If [x_center, y_center, w, h] (set is_center to True). + scores : list of float + A list of score (float). (Optional) + classes_list : list of str + for converting ID to string on image. + is_center : boolean + Whether the coordinates is [x_center, y_center, w, h] + - If coordinates are [x_center, y_center, w, h], set it to True for converting it to [x, y, x2, y2] (up-left and botton-right) internally. + - If coordinates are [x1, x2, y1, y2], set it to False. + is_rescale : boolean + Whether to rescale the coordinates from pixel-unit format to ratio format. + - If True, the input coordinates are the portion of width and high, this API will scale the coordinates to pixel unit internally. + - If False, feed the coordinates with pixel unit format. + save_name : None or str The name of image file (i.e. image.png), if None, not to save image. + Returns + ------- + numpy.array + The saved image. + References ----------- - OpenCV rectangle and putText. - - `scikit-image `_. + - `scikit-image `__. + """ assert len(coords) == len(classes), "number of coordinates and classes are equal" if len(scores) > 0: @@ -122,8 +169,8 @@ def draw_boxes_and_labels_to_image(image, classes=[], coords=[], import cv2 - # image = copy.copy(image) # don't change the original image - image = image.copy() # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy + # don't change the original image, and avoid error https://stackoverflow.com/questions/30249053/python-opencv-drawing-errors-after-manipulating-array-with-numpy + image = image.copy() imh, imw = image.shape[0:2] thick = int((imh + imw) // 430) @@ -134,92 +181,32 @@ def draw_boxes_and_labels_to_image(image, classes=[], coords=[], else: x, y, x2, y2 = coords[i] - if is_rescale: # scale back to pixel unit if the coords are the portion of width and high + if is_rescale: # scale back to pixel unit if the coords are the portion of width and high x, y, x2, y2 = prepro.obj_box_coord_scale_to_pixelunit([x, y, x2, y2], (imh, imw)) - cv2.rectangle(image, - (int(x), int(y)), (int(x2), int(y2)), # up-left and botton-right - [0,255,0], + cv2.rectangle( + image, + (int(x), int(y)), + (int(x2), int(y2)), # up-left and botton-right + [0, 255, 0], thick) cv2.putText( image, classes_list[classes[i]] + ((" %.2f" % (scores[i])) if (len(scores) != 0) else " "), - (int(x), int(y)), # button left + (int(x), int(y)), # button left 0, - 1.5e-3 * imh, # bigger = larger font - [0,0,256], # self.meta['colors'][max_indx], - int(thick/2)+1) # bold + 1.5e-3 * imh, # bigger = larger font + [0, 0, 256], # self.meta['colors'][max_indx], + int(thick / 2) + 1) # bold if save_name is not None: # cv2.imwrite('_my.png', image) save_image(image, save_name) # if len(coords) == 0: - # print("draw_boxes_and_labels_to_image: no bboxes exist, cannot draw !") + # logging.info("draw_boxes_and_labels_to_image: no bboxes exist, cannot draw !") return image -# old APIs -def W(W=None, second=10, saveable=True, shape=[28,28], name='mnist', fig_idx=2396512): - """Visualize every columns of the weight matrix to a group of Greyscale img. - - Parameters - ---------- - W : numpy.array - The weight matrix - second : int - The display second(s) for the image(s), if saveable is False. - saveable : boolean - Save or plot the figure. - shape : a list with 2 int - The shape of feature image, MNIST is [28, 80]. - name : a string - A name to save the image, if saveable is True. - fig_idx : int - matplotlib figure index. - - Examples - -------- - >>> tl.visualize.W(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) - """ - import matplotlib.pyplot as plt - if saveable is False: - plt.ion() - fig = plt.figure(fig_idx) # show all feature images - size = W.shape[0] - n_units = W.shape[1] - - num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 - num_c = int(np.ceil(n_units/num_r)) - count = int(1) - for row in range(1, num_r+1): - for col in range(1, num_c+1): - if count > n_units: - break - a = fig.add_subplot(num_r, num_c, count) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') - # ------------------------------------------------------------ - feature = W[:,count-1] / np.sqrt( (W[:,count-1]**2).sum()) - # feature[feature<0.0001] = 0 # value threshold - # if count == 1 or count == 2: - # print(np.mean(feature)) - # if np.std(feature) < 0.03: # condition threshold - # feature = np.zeros_like(feature) - # if np.mean(feature) < -0.015: # condition threshold - # feature = np.zeros_like(feature) - plt.imshow(np.reshape(feature ,(shape[0],shape[1])), - cmap='gray', interpolation="nearest")#, vmin=np.min(feature), vmax=np.max(feature)) - # plt.title(name) - # ------------------------------------------------------------ - # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick - plt.gca().yaxis.set_major_locator(plt.NullLocator()) - count = count + 1 - if saveable: - plt.savefig(name+'.pdf',format='pdf') - else: - plt.draw() - plt.pause(second) def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=12836): """Display a frame(image). Make sure OpenAI Gym render() is disable before using it. @@ -227,14 +214,14 @@ def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=1283 Parameters ---------- I : numpy.array - The image + The image. second : int The display second(s) for the image(s), if saveable is False. saveable : boolean Save or plot the figure. - name : a string + name : str A name to save the image, if saveable is True. - cmap : None or string + cmap : None or str 'gray' for greyscale, None for default, etc. fig_idx : int matplotlib figure index. @@ -244,14 +231,15 @@ def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=1283 >>> env = gym.make("Pong-v0") >>> observation = env.reset() >>> tl.visualize.frame(observation) + """ import matplotlib.pyplot as plt if saveable is False: plt.ion() - fig = plt.figure(fig_idx) # show all feature images + fig = plt.figure(fig_idx) # show all feature images - if len(I.shape) and I.shape[-1]==1: # (10,10,1) --> (10,10) - I = I[:,:,0] + if len(I.shape) and I.shape[-1] == 1: # (10,10,1) --> (10,10) + I = I[:, :, 0] plt.imshow(I, cmap) plt.title(name) @@ -259,11 +247,12 @@ def frame(I=None, second=5, saveable=True, name='frame', cmap=None, fig_idx=1283 # plt.gca().yaxis.set_major_locator(plt.NullLocator()) if saveable: - plt.savefig(name+'.pdf',format='pdf') + plt.savefig(name + '.pdf', format='pdf') else: plt.draw() plt.pause(second) + def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): """Display a group of RGB or Greyscale CNN masks. @@ -275,58 +264,55 @@ def CNN2d(CNN=None, second=10, saveable=True, name='cnn', fig_idx=3119362): The display second(s) for the image(s), if saveable is False. saveable : boolean Save or plot the figure. - name : a string + name : str A name to save the image, if saveable is True. fig_idx : int - matplotlib figure index. + The matplotlib figure index. Examples -------- >>> tl.visualize.CNN2d(network.all_params[0].eval(), second=10, saveable=True, name='cnn1_mnist', fig_idx=2012) + """ import matplotlib.pyplot as plt - # print(CNN.shape) # (5, 5, 3, 64) + # logging.info(CNN.shape) # (5, 5, 3, 64) # exit() n_mask = CNN.shape[3] n_row = CNN.shape[0] n_col = CNN.shape[1] n_color = CNN.shape[2] row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask/row)) - plt.ion() # active mode + col = int(np.ceil(n_mask / row)) + plt.ion() # active mode fig = plt.figure(fig_idx) count = 1 - for ir in range(1, row+1): - for ic in range(1, col+1): + for ir in range(1, row + 1): + for ic in range(1, col + 1): if count > n_mask: break a = fig.add_subplot(col, row, count) - # print(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 + # logging.info(CNN[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 # exit() # plt.imshow( # np.reshape(CNN[count-1,:,:,:], (n_row, n_col)), # cmap='gray', interpolation="nearest") # theano if n_color == 1: - plt.imshow( - np.reshape(CNN[:,:,:,count-1], (n_row, n_col)), - cmap='gray', interpolation="nearest") + plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col)), cmap='gray', interpolation="nearest") elif n_color == 3: - plt.imshow( - np.reshape(CNN[:,:,:,count-1], (n_row, n_col, n_color)), - cmap='gray', interpolation="nearest") + plt.imshow(np.reshape(CNN[:, :, :, count - 1], (n_row, n_col, n_color)), cmap='gray', interpolation="nearest") else: raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick + plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick plt.gca().yaxis.set_major_locator(plt.NullLocator()) count = count + 1 if saveable: - plt.savefig(name+'.pdf',format='pdf') + plt.savefig(name + '.pdf', format='pdf') else: plt.draw() plt.pause(second) -def images2d(images=None, second=10, saveable=True, name='images', dtype=None, - fig_idx=3119362): + +def images2d(images=None, second=10, saveable=True, name='images', dtype=None, fig_idx=3119362): """Display a group of RGB or Greyscale images. Parameters @@ -337,7 +323,7 @@ def images2d(images=None, second=10, saveable=True, name='images', dtype=None, The display second(s) for the image(s), if saveable is False. saveable : boolean Save or plot the figure. - name : a string + name : str A name to save the image, if saveable is True. dtype : None or numpy data type The data type for displaying the images. @@ -348,9 +334,10 @@ def images2d(images=None, second=10, saveable=True, name='images', dtype=None, -------- >>> X_train, y_train, X_test, y_test = tl.files.load_cifar10_dataset(shape=(-1, 32, 32, 3), plotable=False) >>> tl.visualize.images2d(X_train[0:100,:,:,:], second=10, saveable=False, name='cifar10', dtype=np.uint8, fig_idx=20212) + """ import matplotlib.pyplot as plt - # print(images.shape) # (50000, 32, 32, 3) + # logging.info(images.shape) # (50000, 32, 32, 3) # exit() if dtype: images = np.asarray(images, dtype=dtype) @@ -359,48 +346,45 @@ def images2d(images=None, second=10, saveable=True, name='images', dtype=None, n_col = images.shape[2] n_color = images.shape[3] row = int(np.sqrt(n_mask)) - col = int(np.ceil(n_mask/row)) - plt.ion() # active mode + col = int(np.ceil(n_mask / row)) + plt.ion() # active mode fig = plt.figure(fig_idx) count = 1 - for ir in range(1, row+1): - for ic in range(1, col+1): + for ir in range(1, row + 1): + for ic in range(1, col + 1): if count > n_mask: break a = fig.add_subplot(col, row, count) - # print(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 + # logging.info(images[:,:,:,count-1].shape, n_row, n_col) # (5, 1, 32) 5 5 # plt.imshow( # np.reshape(images[count-1,:,:,:], (n_row, n_col)), # cmap='gray', interpolation="nearest") # theano if n_color == 1: - plt.imshow( - np.reshape(images[count-1,:,:], (n_row, n_col)), - cmap='gray', interpolation="nearest") + plt.imshow(np.reshape(images[count - 1, :, :], (n_row, n_col)), cmap='gray', interpolation="nearest") # plt.title(name) elif n_color == 3: - plt.imshow(images[count-1,:,:], - cmap='gray', interpolation="nearest") + plt.imshow(images[count - 1, :, :], cmap='gray', interpolation="nearest") # plt.title(name) else: raise Exception("Unknown n_color") - plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick + plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick plt.gca().yaxis.set_major_locator(plt.NullLocator()) count = count + 1 if saveable: - plt.savefig(name+'.pdf',format='pdf') + plt.savefig(name + '.pdf', format='pdf') else: plt.draw() plt.pause(second) -def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, - second=5, saveable=False, name='tsne', fig_idx=9862): + +def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, second=5, saveable=False, name='tsne', fig_idx=9862): """Visualize the embeddings by using t-SNE. Parameters ---------- - embeddings : a matrix - The images. - reverse_dictionary : a dictionary + embeddings : numpy.array + The embedding matrix. + reverse_dictionary : dictionary id_to_word, mapping id to unique word. plot_only : int The number of examples to plot, choice the most common words. @@ -408,7 +392,7 @@ def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, The display second(s) for the image(s), if saveable is False. saveable : boolean Save or plot the figure. - name : a string + name : str A name to save the image, if saveable is True. fig_idx : int matplotlib figure index. @@ -419,26 +403,22 @@ def tsne_embedding(embeddings, reverse_dictionary, plot_only=500, >>> final_embeddings = normalized_embeddings.eval() >>> tl.visualize.tsne_embedding(final_embeddings, labels, reverse_dictionary, ... plot_only=500, second=5, saveable=False, name='tsne') + """ import matplotlib.pyplot as plt - def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, - saveable=True, name='tsne', fig_idx=9862): + + def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, saveable=True, name='tsne', fig_idx=9862): assert low_dim_embs.shape[0] >= len(labels), "More labels than embeddings" if saveable is False: plt.ion() plt.figure(fig_idx) plt.figure(figsize=figsize) #in inches for i, label in enumerate(labels): - x, y = low_dim_embs[i,:] + x, y = low_dim_embs[i, :] plt.scatter(x, y) - plt.annotate(label, - xy=(x, y), - xytext=(5, 2), - textcoords='offset points', - ha='right', - va='bottom') + plt.annotate(label, xy=(x, y), xytext=(5, 2), textcoords='offset points', ha='right', va='bottom') if saveable: - plt.savefig(name+'.pdf',format='pdf') + plt.savefig(name + '.pdf', format='pdf') else: plt.draw() plt.pause(second) @@ -450,12 +430,72 @@ def plot_with_labels(low_dim_embs, labels, figsize=(18, 18), second=5, tsne = TSNE(perplexity=30, n_components=2, init='pca', n_iter=5000) # plot_only = 500 - low_dim_embs = tsne.fit_transform(embeddings[:plot_only,:]) + low_dim_embs = tsne.fit_transform(embeddings[:plot_only, :]) labels = [reverse_dictionary[i] for i in xrange(plot_only)] plot_with_labels(low_dim_embs, labels, second=second, saveable=saveable, \ name=name, fig_idx=fig_idx) except ImportError: - print("Please install sklearn and matplotlib to visualize embeddings.") + logging.info("Please install sklearn and matplotlib to visualize embeddings.") + + +def draw_weights(W=None, second=10, saveable=True, shape=[28, 28], name='mnist', fig_idx=2396512): + """Visualize every columns of the weight matrix to a group of Greyscale img. + + Parameters + ---------- + W : numpy.array + The weight matrix + second : int + The display second(s) for the image(s), if saveable is False. + saveable : boolean + Save or plot the figure. + shape : a list with 2 int + The shape of feature image, MNIST is [28, 80]. + name : a string + A name to save the image, if saveable is True. + fig_idx : int + matplotlib figure index. + Examples + -------- + >>> tl.visualize.draw_weights(network.all_params[0].eval(), second=10, saveable=True, name='weight_of_1st_layer', fig_idx=2012) + + """ + import matplotlib.pyplot as plt + if saveable is False: + plt.ion() + fig = plt.figure(fig_idx) # show all feature images + size = W.shape[0] + n_units = W.shape[1] -# + num_r = int(np.sqrt(n_units)) # 每行显示的个数 若25个hidden unit -> 每行显示5个 + num_c = int(np.ceil(n_units / num_r)) + count = int(1) + for row in range(1, num_r + 1): + for col in range(1, num_c + 1): + if count > n_units: + break + a = fig.add_subplot(num_r, num_c, count) + # ------------------------------------------------------------ + # plt.imshow(np.reshape(W[:,count-1],(28,28)), cmap='gray') + # ------------------------------------------------------------ + feature = W[:, count - 1] / np.sqrt((W[:, count - 1]**2).sum()) + # feature[feature<0.0001] = 0 # value threshold + # if count == 1 or count == 2: + # print(np.mean(feature)) + # if np.std(feature) < 0.03: # condition threshold + # feature = np.zeros_like(feature) + # if np.mean(feature) < -0.015: # condition threshold + # feature = np.zeros_like(feature) + plt.imshow(np.reshape(feature, (shape[0], shape[1])), cmap='gray', interpolation="nearest") #, vmin=np.min(feature), vmax=np.max(feature)) + # plt.title(name) + # ------------------------------------------------------------ + # plt.imshow(np.reshape(W[:,count-1] ,(np.sqrt(size),np.sqrt(size))), cmap='gray', interpolation="nearest") + plt.gca().xaxis.set_major_locator(plt.NullLocator()) # distable tick + plt.gca().yaxis.set_major_locator(plt.NullLocator()) + count = count + 1 + if saveable: + plt.savefig(name + '.pdf', format='pdf') + else: + plt.draw() + plt.pause(second) \ No newline at end of file