From e78536d2cda5b891489e7fe7b85edb5d394ffb3b Mon Sep 17 00:00:00 2001 From: mhuen Date: Sun, 21 Apr 2024 14:12:40 +0200 Subject: [PATCH 1/2] Expose seed parameter to weight initialization --- tfscripts/compat/v1/conv.py | 25 +++--- tfscripts/compat/v1/core.py | 26 ++++--- tfscripts/compat/v1/hex/conv.py | 64 ++++++++++++---- tfscripts/compat/v1/hex/icecube.py | 11 ++- tfscripts/compat/v1/hex/rotation.py | 114 ++++++++++++++++++++-------- tfscripts/compat/v1/hex/visual.py | 47 ++++++------ tfscripts/compat/v1/layers.py | 81 ++++++++++++++++---- tfscripts/compat/v1/weights.py | 71 ++++++++++++----- tfscripts/conv.py | 15 ++++ tfscripts/core.py | 27 +++++-- tfscripts/hex/conv.py | 49 ++++++++++-- tfscripts/hex/icecube.py | 15 +++- tfscripts/hex/rotation.py | 90 +++++++++++++++++----- tfscripts/hex/visual.py | 46 ++++++----- tfscripts/layers.py | 61 +++++++++++++-- tfscripts/model.py | 12 ++- tfscripts/utils.py | 14 ++++ tfscripts/weights.py | 72 ++++++++++++++---- 18 files changed, 635 insertions(+), 205 deletions(-) diff --git a/tfscripts/compat/v1/conv.py b/tfscripts/compat/v1/conv.py index 9bf6a5e..963734a 100644 --- a/tfscripts/compat/v1/conv.py +++ b/tfscripts/compat/v1/conv.py @@ -239,6 +239,7 @@ def locally_connected_2d( strides=[1, 1], padding="SAME", dilation_rate=None, + seed=None, ): """ Like conv2d, but doesn't share weights. @@ -261,10 +262,11 @@ def locally_connected_2d( The stride of the sliding window for each dimension of input. padding : A string from: "SAME", "VALID". The type of padding algorithm to use. - dilation_rate : None or list of int of length 2 [dilattion in x, dilation in y] defines dilattion rate to be used + seed : None or int + Seed for the random number generator. Returns ------- @@ -316,7 +318,9 @@ def locally_connected_2d( # fast shortcut if list(filter_size) == [1, 1]: if kernel is None: - kernel = new_weights(shape=input_shape[1:] + [num_outputs]) + kernel = new_weights( + shape=input_shape[1:] + [num_outputs], seed=seed + ) output = tf.reduce_sum( input_tensor=tf.expand_dims(input, axis=4) * kernel, axis=3 ) @@ -412,7 +416,7 @@ def locally_connected_2d( # get kernel # ------------------ if kernel is None: - kernel = new_weights(shape=kernel_shape) + kernel = new_weights(shape=kernel_shape, seed=seed) # ------------------ # perform convolution @@ -431,6 +435,7 @@ def locally_connected_3d( strides=[1, 1, 1], padding="SAME", dilation_rate=None, + seed=None, ): """ Like conv3d, but doesn't share weights. @@ -456,6 +461,8 @@ def locally_connected_3d( dilation_rate : None or list of int of length 3 [dilattion in x, dilation in y, dilation in z] defines dilattion rate to be used + seed : None or int + Seed for the random number generator. Returns ------- @@ -507,7 +514,9 @@ def locally_connected_3d( # fast shortcut if list(filter_size) == [1, 1, 1]: if kernel is None: - kernel = new_weights(shape=input_shape[1:] + [num_outputs]) + kernel = new_weights( + shape=input_shape[1:] + [num_outputs], seed=seed + ) output = tf.reduce_sum( input_tensor=tf.expand_dims(input, axis=5) * kernel, axis=4 ) @@ -628,7 +637,7 @@ def locally_connected_3d( # get kernel # ------------------ if kernel is None: - kernel = new_weights(shape=kernel_shape) + kernel = new_weights(shape=kernel_shape, seed=seed) # ------------------ # perform convolution @@ -663,18 +672,14 @@ def local_translational3d_trafo( float32, float64, int64, int32, uint8, uint16, int16, int8, complex64, complex128, qint8, quint8, qint32, half. Shape [batch, in_depth, in_height, in_width, in_channels]. - num_outputs : int Number of output channels - filter_size : list of int of size 3 [filter x size, filter y size, filter z size] - fcn : callable: fcn(input_patch) Defines the transformation: input_patch -> output with output.shape = [-1, num_outputs] - weights : None, optional Description strides : A list of ints that has length >= 5. 1-D tensor of length 5. @@ -682,11 +687,9 @@ def local_translational3d_trafo( Must have strides[0] = strides[4] = 1. padding : A string from: "SAME", "VALID". The type of padding algorithm to use. - dilation_rate :None or list of int of length 3 [dilattion in x, dilation in y, dilation in z] defines dilattion rate to be used - is_training : bool, optional Indicates whether currently in training or inference mode. True: in training mode diff --git a/tfscripts/compat/v1/core.py b/tfscripts/compat/v1/core.py index 204c39c..ef1ccc8 100644 --- a/tfscripts/compat/v1/core.py +++ b/tfscripts/compat/v1/core.py @@ -17,7 +17,12 @@ def add_residual( - input, residual, strides=None, use_scale_factor=True, scale_factor=0.001 + input, + residual, + strides=None, + use_scale_factor=True, + scale_factor=0.001, + seed=None, ): """Convenience function to add a residual @@ -39,6 +44,8 @@ def add_residual( scale_factor : float, optional Defines how much the residuals will be scaled prior to addition if use_scale_factor is True. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -78,7 +85,7 @@ def add_residual( # Residuals added over multiple layers accumulate. # A scale factor < 1 reduces instabilities in beginning if use_scale_factor: - scale = new_weights([num_outputs], stddev=scale_factor) + scale = new_weights([num_outputs], stddev=scale_factor, seed=seed) residual = residual * scale if num_inputs == num_outputs: output = residual + input @@ -115,6 +122,7 @@ def activation( use_batch_normalisation=False, is_training=None, verbose=True, + seed=None, ): """ Helper-functions to perform activation on a layer @@ -218,7 +226,7 @@ def activation( ) elif activation_type == "prelu": - slope = new_weights(layer.get_shape().as_list()[1:]) + 1.0 + slope = new_weights(layer.get_shape().as_list()[1:], seed=seed) + 1.0 layer = tf.where( tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), layer * slope, @@ -226,8 +234,8 @@ def activation( ) elif activation_type == "pelu": - a = new_weights(layer.get_shape().as_list()[1:]) + 1.0 - b = new_weights(layer.get_shape().as_list()[1:]) + 1.0 + a = new_weights(layer.get_shape().as_list()[1:], seed=seed) + 1.0 + b = new_weights(layer.get_shape().as_list()[1:], seed=seed + 1) + 1.0 layer = tf.where( tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)), (tf.exp(layer / b) - 1) * a, @@ -238,10 +246,10 @@ def activation( layer = tf.exp(-tf.square(layer)) elif activation_type == "pgaussian": - sigma = new_weights(layer.get_shape().as_list()[1:]) + tf.constant( - 1.0, dtype=FLOAT_PRECISION - ) - mu = new_weights(layer.get_shape().as_list()[1:]) + sigma = new_weights( + layer.get_shape().as_list()[1:], seed=seed + ) + tf.constant(1.0, dtype=FLOAT_PRECISION) + mu = new_weights(layer.get_shape().as_list()[1:], seed=seed + 1) layer = tf.exp( tf.square((layer - mu) / sigma) * tf.constant(-0.5, dtype=FLOAT_PRECISION) diff --git a/tfscripts/compat/v1/hex/conv.py b/tfscripts/compat/v1/hex/conv.py index d3e4081..7565874 100644 --- a/tfscripts/compat/v1/hex/conv.py +++ b/tfscripts/compat/v1/hex/conv.py @@ -12,6 +12,7 @@ import tensorflow as tf # tfscripts.compat.v1 specific imports +from tfscripts.utils import SeedCounter from tfscripts.compat.v1.weights import ( new_weights, new_biases, @@ -74,7 +75,7 @@ def hex_distance(h1, h2): return (abs(a1 - a2) + abs(b1 - b2) + abs(c1 - c2)) / 2 -def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): +def get_hex_kernel(filter_size, print_kernel=False, get_ones=False, seed=None): """Get hexagonal convolution kernel Create Weights for a hexagonal kernel. @@ -126,12 +127,13 @@ def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): 1 represents a trainable Tensor of shape filter_size[2:] This can be used to verify the shape of the hex kernel False: do not print - get_ones : bool, optional If True, returns constant ones for elements in hexagon. If False, return trainable tf.tensor for elements in hexagon. In both cases, constant zeros are returned for elements outside of hexagon. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -147,6 +149,9 @@ def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): ValueError Description """ + # create seed counter + cnt = SeedCounter(seed) + k = filter_size[0] x = filter_size[1] @@ -180,7 +185,7 @@ def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): if get_ones: weights = ones else: - weights = new_weights(filter_size[2:]) + weights = new_weights(filter_size[2:], seed=cnt()) test_hex_dict[(a, b)] = 1 # ------------------------- @@ -219,7 +224,7 @@ def get_hex_kernel(filter_size, print_kernel=False, get_ones=False): if get_ones: weights = ones else: - weights = new_weights(filter_size[2:]) + weights = new_weights(filter_size[2:], seed=cnt()) test_hex_dict[(a, b)] = 1 else: weights = zeros @@ -244,6 +249,7 @@ def conv_hex( zero_out=False, kernel=None, azimuth=None, + seed=None, ): """Convolve a hex2d or hex3d layer (2d hex + 1d cartesian) @@ -310,6 +316,8 @@ def conv_hex( azimuth : float or scalar float tf.Tensor Hexagonal kernel is turned by the angle 'azimuth' [given in degrees] in counterclockwise direction + seed : int, optional + Seed for the random number generator. Returns ------- @@ -331,16 +339,21 @@ def conv_hex( if kernel is None: if azimuth is not None and filter_size[:2] != [1, 0]: kernel = rotation.get_dynamic_rotation_hex_kernel( - filter_size + [num_channels, num_filters], azimuth + filter_size + [num_channels, num_filters], + azimuth, + seed=seed, ) else: if num_rotations > 1: kernel = rotation.get_rotated_hex_kernel( - filter_size + [num_channels, num_filters], num_rotations + filter_size + [num_channels, num_filters], + num_rotations, + seed=seed, ) else: kernel = get_hex_kernel( - filter_size + [num_channels, num_filters] + filter_size + [num_channels, num_filters], + seed=seed, ) if azimuth is not None and filter_size[:2] != [1, 0]: @@ -368,7 +381,9 @@ def conv_hex( print("Assuming IceCube shape for layer", result) zero_out_matrix = get_icecube_kernel( - result.get_shape().as_list()[3:], get_ones=True + result.get_shape().as_list()[3:], + get_ones=True, + seed=seed, ) result = result * zero_out_matrix @@ -382,6 +397,7 @@ def conv_hex( num_filters * num_rotations, ], get_ones=True, + seed=seed, ) if result.get_shape()[1:] == zero_out_matrix.get_shape(): result = result * zero_out_matrix @@ -415,6 +431,7 @@ def conv_hex4d( azimuth=None, stack_axis=None, zero_out=False, + seed=None, ): """Convolve a hex4d layer (2d hex + 1d cartesian + 1d time) @@ -486,6 +503,8 @@ def conv_hex4d( If True, elements in result tensor which are not part of hexagon or IceCube strings (if shape in x and y dimensions is 10x10), will be set to zero. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -502,16 +521,21 @@ def conv_hex4d( num_in_channels = input_data.get_shape().as_list()[5] if azimuth is not None: kernel = rotation.get_dynamic_rotation_hex_kernel( - filter_size + [num_in_channels, num_filters], azimuth + filter_size + [num_in_channels, num_filters], + azimuth, + seed=seed, ) else: if num_rotations > 1: kernel = rotation.get_rotated_hex_kernel( - filter_size + [num_in_channels, num_filters], num_rotations + filter_size + [num_in_channels, num_filters], + num_rotations, + seed=seed, ) else: kernel = get_hex_kernel( - filter_size + [num_in_channels, num_filters] + filter_size + [num_in_channels, num_filters], + seed=seed, ) # convolve with tf conv4d_stacked @@ -535,6 +559,7 @@ def conv_hex4d( num_filters * num_rotations, ], get_ones=True, + seed=seed, ) if result.get_shape()[1:] == zero_out_matrix.get_shape(): @@ -557,6 +582,7 @@ def create_conv_hex_layers_weights( num_filters_list, num_rotations_list=1, azimuth_list=None, + seed=None, ): """Create weights and biases for conv hex n-dimensional layers with n >= 2 @@ -612,12 +638,16 @@ def create_conv_hex_layers_weights( If only a single azimuth angle is given, the same rotation is used for all layers. If azimuth is None, the hexagonal kernel is not rotated. + seed : int, optional + Seed for the random number generator. Returns ------- list of tf.Tensor, list of tf.Tensor Returns the list of weight and bias tensors for each layer """ + # create seed counter + cnt = SeedCounter(seed) # create num_rotations_list if isinstance(num_rotations_list, int): @@ -638,21 +668,27 @@ def create_conv_hex_layers_weights( ): if azimuth is not None: kernel = rotation.get_dynamic_rotation_hex_kernel( - filter_size, azimuth + filter_size, + azimuth, + seed=cnt(), ) else: if num_rotations > 1: kernel = rotation.get_rotated_hex_kernel( filter_size + [num_input_channels, num_filters], num_rotations, + seed=cnt(), ) else: kernel = get_hex_kernel( - filter_size + [num_input_channels, num_filters] + filter_size + [num_input_channels, num_filters], + seed=cnt(), ) weights_list.append(kernel) - biases_list.append(new_biases(length=num_filters * num_rotations)) + biases_list.append( + new_biases(length=num_filters * num_rotations, seed=cnt()) + ) num_input_channels = num_filters return weights_list, biases_list diff --git a/tfscripts/compat/v1/hex/icecube.py b/tfscripts/compat/v1/hex/icecube.py index 2f46882..218f4bd 100644 --- a/tfscripts/compat/v1/hex/icecube.py +++ b/tfscripts/compat/v1/hex/icecube.py @@ -7,6 +7,7 @@ import tensorflow as tf # tfscripts.compat.v1 specific imports +from tfscripts.utils import SeedCounter from tfscripts.compat.v1.weights import new_weights # constants @@ -238,7 +239,7 @@ def get_icecube_string_from_hex_coord(a, b): return hex_string_coord_dict[(a, b)] -def get_icecube_kernel(shape, get_ones=False): +def get_icecube_kernel(shape, get_ones=False, seed=None): """ Get a kernel of shape 'shape' for IceCube where coordinates of no real strings are set to constant zeros. @@ -247,17 +248,21 @@ def get_icecube_kernel(shape, get_ones=False): ---------- shape : list of int The shape of the desired kernel. - get_ones : bool, optional If True, returns constant ones for real DOMs, zeros for virtual DOMs. If False, return trainable parameter for real DOMs, zeros for virtual DOMs + seed : int, optional + Seed for the random number generator. Returns ------- tf.Tensor The icecube kernel with the desired shape. """ + # create seed counter + cnt = SeedCounter(seed) + zeros = tf.zeros(shape, dtype=FLOAT_PRECISION) ones = tf.ones(shape, dtype=FLOAT_PRECISION) @@ -272,7 +277,7 @@ def get_icecube_kernel(shape, get_ones=False): if get_ones: weights = ones else: - weights = new_weights(shape) + weights = new_weights(shape, seed=cnt()) else: # virtual string, string does not actually exist weights = zeros diff --git a/tfscripts/compat/v1/hex/rotation.py b/tfscripts/compat/v1/hex/rotation.py index c787b11..cbe6048 100644 --- a/tfscripts/compat/v1/hex/rotation.py +++ b/tfscripts/compat/v1/hex/rotation.py @@ -12,6 +12,8 @@ import numpy as np import tensorflow as tf +from tfscripts.utils import SeedCounter + # tfscripts.compat.v1 specific imports from tfscripts.compat.v1.weights import new_weights @@ -95,7 +97,7 @@ def tf_get_rotated_corner_weights(corner_weights, azimuth): return rotatedcorner_weights -def get_dynamic_rotation_hex_kernel(filter_size, azimuth): +def get_dynamic_rotation_hex_kernel(filter_size, azimuth, seed=None): """Dynamically azimuthally rotated hexagonal kernels. Create Weights for a hexagonal kernel. @@ -131,6 +133,8 @@ def get_dynamic_rotation_hex_kernel(filter_size, azimuth): azimuth : tf tensor A scalar float tf.Tensor denoting the angle by which the kernel will be dynamically rotated. Azimuth angle is given in degrees. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -147,51 +151,60 @@ def get_dynamic_rotation_hex_kernel(filter_size, azimuth): Description """ + # create seed counter + cnt = SeedCounter(seed) + no_of_dims = len(filter_size) Z = tf.zeros( [tf.shape(input=azimuth)[0]] + filter_size[2:], dtype=FLOAT_PRECISION ) - center_weight = new_weights([1] + filter_size[2:]) + center_weight = new_weights([1] + filter_size[2:], seed=cnt()) multiples = [tf.shape(input=azimuth)[0]] + [1] * (no_of_dims - 2) center_weight = tf.tile(center_weight, multiples) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) corner_weights2 = [] for i in range(6): - corner_weights2.extend([Z, new_weights(filter_size[2:])]) + corner_weights2.extend( + [Z, new_weights(filter_size[2:], seed=cnt())] + ) corner_weights2 = tf.stack(corner_weights2) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) - corner_weights2 = new_weights([12] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) + corner_weights2 = new_weights([12] + filter_size[2:], seed=cnt()) elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) - corner_weights2 = new_weights([12] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) + corner_weights2 = new_weights([12] + filter_size[2:], seed=cnt()) corner_weights3 = [] for i in range(6): - corner_weights3.extend([Z, new_weights(filter_size[2:]), Z]) + corner_weights3.extend( + [Z, new_weights(filter_size[2:]), Z], seed=cnt() + ) corner_weights3 = tf.stack(corner_weights3) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) - corner_weights2 = new_weights([12] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) + corner_weights2 = new_weights([12] + filter_size[2:], seed=cnt()) corner_weights3 = [] for i in range(6): - corner_weights3.extend([Z, Z, new_weights(filter_size[2:])]) + corner_weights3.extend( + [Z, Z, new_weights(filter_size[2:])], seed=cnt() + ) corner_weights3 = tf.stack(corner_weights3) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter - corner_weights1 = new_weights([6] + filter_size[2:]) - corner_weights2 = new_weights([12] + filter_size[2:]) - corner_weights2 = new_weights([18] + filter_size[2:]) + corner_weights1 = new_weights([6] + filter_size[2:], seed=cnt()) + corner_weights2 = new_weights([12] + filter_size[2:], seed=cnt()) + corner_weights2 = new_weights([18] + filter_size[2:], seed=cnt()) else: raise ValueError( "get_dynamic_rotation_hex_kernel: Unsupported " @@ -270,7 +283,7 @@ def get_dynamic_rotation_hex_kernel(filter_size, azimuth): # ------------------------------------------------------------------------- # hexagonal azimuth rotated filters # ------------------------------------------------------------------------- -def get_rotated_hex_kernel(filter_size, num_rotations): +def get_rotated_hex_kernel(filter_size, num_rotations, seed=None): """ Create Weights for a hexagonal kernel. The kernel is rotated 'num_rotations' many times. @@ -306,6 +319,8 @@ def get_rotated_hex_kernel(filter_size, num_rotations): num_rotations : int. number of rotational kernels to create. Kernels will be rotated by 360 degrees / num_rotations + seed : int, optional + Seed for the random number generator. Returns ------- @@ -323,44 +338,75 @@ def get_rotated_hex_kernel(filter_size, num_rotations): Description """ + # create seed counter + cnt = SeedCounter(seed) + no_of_dims = len(filter_size) azimuths = np.linspace(0, 360, num_rotations + 1)[:-1] Z = tf.zeros(filter_size[2:-2], dtype=FLOAT_PRECISION) - center_weight = new_weights(filter_size[2:-2]) + center_weight = new_weights(filter_size[2:-2], seed=cnt()) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] corner_weights2 = [] for i in range(6): - corner_weights2.extend([Z, new_weights(filter_size[2:-2])]) + corner_weights2.extend( + [Z, new_weights(filter_size[2:-2], seed=cnt())] + ) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] - corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] + corner_weights2 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(12) + ] elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] - corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] + corner_weights2 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(12) + ] corner_weights3 = [] for i in range(6): - corner_weights3.extend([Z, new_weights(filter_size[2:-2]), Z]) + corner_weights3.extend( + [Z, new_weights(filter_size[2:-2], seed=cnt()), Z] + ) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] - corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] + corner_weights2 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(12) + ] corner_weights3 = [] for i in range(6): - corner_weights3.extend([Z, Z, new_weights(filter_size[2:-2])]) + corner_weights3.extend( + [Z, Z, new_weights(filter_size[2:-2], seed=cnt())] + ) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter - corner_weights1 = [new_weights(filter_size[2:-2]) for i in range(6)] - corner_weights2 = [new_weights(filter_size[2:-2]) for i in range(12)] - corner_weights3 = [new_weights(filter_size[2:-2]) for i in range(18)] + corner_weights1 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(6) + ] + corner_weights2 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(12) + ] + corner_weights3 = [ + new_weights(filter_size[2:-2], seed=cnt()) for i in range(18) + ] else: raise ValueError( "get_rotated_hex_kernel: Unsupported " @@ -368,7 +414,9 @@ def get_rotated_hex_kernel(filter_size, num_rotations): ) rotated_kernels = [] - in_out_channel_weights = new_weights([num_rotations] + filter_size[-2:]) + in_out_channel_weights = new_weights( + [num_rotations] + filter_size[-2:], seed=cnt() + ) for i, azimuth in enumerate(azimuths): rotated_kernel_rows = [] diff --git a/tfscripts/compat/v1/hex/visual.py b/tfscripts/compat/v1/hex/visual.py index b56ce5d..9c90d3b 100644 --- a/tfscripts/compat/v1/hex/visual.py +++ b/tfscripts/compat/v1/hex/visual.py @@ -164,7 +164,10 @@ def plot_hex2D(hex_grid, file=None, hex_grid_spacing=1.0, norm="linear"): def visualize_rotated_hex_kernel( - filter_size, num_rotations, file="Rotation_{azimuth:2.2f}.png" + filter_size, + num_rotations, + file="Rotation_{azimuth:2.2f}.png", + seed=None, ): """Visualize hexagonal azimuth rotated filters @@ -185,66 +188,60 @@ def visualize_rotated_hex_kernel( filter_size = [s, o, 3. dim(e.g. z), 4. dim(e.g. t),...] s: size of hexagon o: orientation of hexagon - num_rotations : int. number of rotational kernels to create. Kernels will be rotated by 360 degrees / num_rotations - - file : str, optional A file pattern to which the plots of the rotated kernels will be saved to. The file pattern is formatted with a keyword 'azimuth' which holds the current azimuth rotation. + seed : int, optional + Seed for random number generator Raises ------ ValueError Description """ + rng = np.random.RandomState(seed) azimuths = np.linspace(0, 360, num_rotations + 1)[:-1] Z = 0 - center_weight = np.random.uniform(1, high=15, size=1) + center_weight = rng.uniform(1, high=15, size=1) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) + corner_weights1 = rng.uniform(1, high=15, size=6) elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) + corner_weights1 = rng.uniform(1, high=15, size=6) corner_weights2 = [] for i in range(6): - corner_weights2.extend( - [Z, np.random.uniform(1, high=15, size=1)[0]] - ) + corner_weights2.extend([Z, rng.uniform(1, high=15, size=1)[0]]) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) corner_weights3 = [] for i in range(6): - corner_weights3.extend( - [Z, np.random.uniform(1, high=15, size=1)[0], Z] - ) + corner_weights3.extend([Z, rng.uniform(1, high=15, size=1)[0], Z]) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) corner_weights3 = [] for i in range(6): - corner_weights3.extend( - [Z, Z, np.random.uniform(1, high=15, size=1)[0]] - ) + corner_weights3.extend([Z, Z, rng.uniform(1, high=15, size=1)[0]]) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) - corner_weights3 = np.random.uniform(1, high=15, size=18) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) + corner_weights3 = rng.uniform(1, high=15, size=18) else: raise ValueError( "visualize_rotated_hex_kernel: Unsupported hexagonal " diff --git a/tfscripts/compat/v1/layers.py b/tfscripts/compat/v1/layers.py index 7dcf72d..a2bdeec 100644 --- a/tfscripts/compat/v1/layers.py +++ b/tfscripts/compat/v1/layers.py @@ -8,6 +8,7 @@ import tensorflow as tf # tfscripts.compat.v1 specific imports +from tfscripts.utils import SeedCounter from tfscripts.compat.v1.weights import ( new_weights, new_biases, @@ -146,6 +147,7 @@ def new_conv_nd_layer( hex_num_rotations=1, hex_azimuth=None, hex_zero_out=False, + seed=None, ): """Helper-function for creating a new n-dim Convolutional Layer @@ -276,6 +278,8 @@ def new_conv_nd_layer( If True, elements in result tensor which are not part of hexagon or IceCube strings (if shape in x and y dimensions is 10x10), will be set to zero. + seed : None or int, optional + The seed to be used for random initialisation of weights. Returns ------- @@ -292,6 +296,8 @@ def new_conv_nd_layer( ValueError Description """ + # create seed counter + cnt = SeedCounter(seed) # check dimension of input num_dims = len(input.shape) - 2 @@ -342,11 +348,11 @@ def new_conv_nd_layer( # Create new weights aka. filters with the given shape. if method.lower() == "convolution": if weights is None: - weights = new_weights(shape=shape) + weights = new_weights(shape=shape, seed=cnt()) # Create new biases, one for each filter. if biases is None: - biases = new_biases(length=num_filters) + biases = new_biases(length=num_filters, seed=cnt()) # ------------------- # Perform convolution @@ -385,6 +391,7 @@ def new_conv_nd_layer( dilation_rate=dilation_rate, zero_out=hex_zero_out, kernel=weights, + seed=cnt(), ) elif num_dims == 4: layer, weights = hx.conv_hex4d( @@ -398,11 +405,14 @@ def new_conv_nd_layer( dilation_rate=dilation_rate, zero_out=hex_zero_out, kernel=weights, + seed=cnt(), ) # Create new biases, one for each filter. if biases is None: - biases = new_biases(length=num_filters * hex_num_rotations) + biases = new_biases( + length=num_filters * hex_num_rotations, seed=cnt() + ) # ------------------- # locally connected @@ -423,6 +433,7 @@ def new_conv_nd_layer( strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate, + seed=cnt(), ) elif num_dims == 3: layer, weights = conv.locally_connected_3d( @@ -432,12 +443,13 @@ def new_conv_nd_layer( strides=strides[1:-1], padding=padding, dilation_rate=dilation_rate, + seed=cnt(), ) elif num_dims == 4: raise NotImplementedError("4D locally connected not implemented!") # Create new biases, one for each filter and position - biases = new_weights(shape=layer.get_shape().as_list()[1:]) + biases = new_weights(shape=layer.get_shape().as_list()[1:], seed=cnt()) # ------------------- # local trafo @@ -481,7 +493,7 @@ def new_conv_nd_layer( raise NotImplementedError("4D dynamic_convolution not implemented") if biases is None: - biases = new_biases(length=num_filters) + biases = new_biases(length=num_filters, seed=cnt()) else: raise ValueError("Unknown method: {!r}".format(method)) @@ -524,12 +536,18 @@ def new_conv_nd_layer( # Apply activation and batch normalisation layer = core.activation( - layer, activation, use_batch_normalisation, is_training + layer, + activation, + use_batch_normalisation, + is_training, + seed=cnt(), ) # Use as Residual if use_residual: - layer = core.add_residual(input=input, residual=layer, strides=strides) + layer = core.add_residual( + input=input, residual=layer, strides=strides, seed=cnt() + ) # Use pooling to down-sample the image resolution? if num_dims == 2: @@ -588,6 +606,7 @@ def new_fc_layer( weights=None, biases=None, max_out_size=None, + seed=None, ): """ Helper-function for creating a new Fully-Connected Layer @@ -625,6 +644,8 @@ def new_fc_layer( max_out_size : None or int, optional The max_out_size for the layer. If None, no max_out is used in the layer. + seed : None or int, optional + The seed to be used for random initialisation of weights. Returns ------- @@ -636,9 +657,9 @@ def new_fc_layer( # Create new weights and biases. if weights is None: - weights = new_weights(shape=[num_inputs, num_outputs]) + weights = new_weights(shape=[num_inputs, num_outputs], seed=seed) if biases is None: - biases = new_biases(length=num_outputs) + biases = new_biases(length=num_outputs, seed=seed) # Calculate the layer as the matrix multiplication of # the input and weights, and then add the bias-values. @@ -651,7 +672,11 @@ def new_fc_layer( # Apply activation and batch normalisation layer = core.activation( - layer, activation, use_batch_normalisation, is_training + layer, + activation, + use_batch_normalisation, + is_training, + seed=seed, ) if max_out_size is not None: @@ -678,6 +703,7 @@ def new_fc_layer( input=input, residual=layer, strides=res_strides, + seed=seed, ) if use_dropout: @@ -698,6 +724,7 @@ def new_channel_wise_fc_layer( weights=None, biases=None, max_out_size=None, + seed=None, ): """ Helper-function for creating a new channel wise Fully-Connected Layer @@ -735,6 +762,8 @@ def new_channel_wise_fc_layer( max_out_size : None or int, optional The max_out_size for the layer. If None, no max_out is used in the layer. + seed : None or int, optional + The seed to be used for random initialisation of weights. Returns ------- @@ -759,9 +788,11 @@ def new_channel_wise_fc_layer( # Create new weights and biases. if weights is None: - weights = new_weights(shape=[num_channels, num_inputs, num_outputs]) + weights = new_weights( + shape=[num_channels, num_inputs, num_outputs], seed=seed + ) if biases is None: - biases = new_weights(shape=[num_outputs, num_channels]) + biases = new_weights(shape=[num_outputs, num_channels], seed=seed) # Calculate the layer as the matrix multiplication of # the input and weights, and then add the bias-values. @@ -777,7 +808,11 @@ def new_channel_wise_fc_layer( # Apply activation and batch normalisation layer = core.activation( - layer, activation, use_batch_normalisation, is_training + layer, + activation, + use_batch_normalisation, + is_training, + seed=seed, ) # Use as Residual @@ -785,7 +820,9 @@ def new_channel_wise_fc_layer( # convert to [batch, num_channel, num_outputs] layer = tf.transpose(a=layer, perm=[0, 2, 1]) layer = core.add_residual( - input=tf.transpose(a=input, perm=[0, 2, 1]), residual=layer + input=tf.transpose(a=input, perm=[0, 2, 1]), + residual=layer, + seed=seed, ) # convert back to [batch, num_outputs, num_channel] layer = tf.transpose(a=layer, perm=[0, 2, 1]) @@ -815,6 +852,7 @@ def new_fc_layers( weights_list=None, biases_list=None, max_out_size_list=None, + seed=None, verbose=True, ): """ @@ -869,6 +907,9 @@ def new_fc_layers( If None, no max_out is used in the corresponding layer. If only a single max_out_size is given, it will be used for all layers. + seed : None or int, optional + The seed to be used for random initialisation of weights. + verbose : bool, optional If true, more verbose output is printed. @@ -919,6 +960,9 @@ def new_fc_layers( "Input dimension is wrong: {}".format(input.get_shape().as_list()) ) + # create seed counter + cnt = SeedCounter(seed) + # create layers: layers = [] weights = [] @@ -940,6 +984,7 @@ def new_fc_layers( weights=weights_list[i], biases=biases_list[i], max_out_size=max_out_size_list[i], + seed=cnt(), ) if verbose: print("fc_layer_{:03d}".format(i), layer_i) @@ -974,6 +1019,7 @@ def new_conv_nd_layers( hex_num_rotations_list=1, hex_azimuth_list=None, hex_zero_out_list=False, + seed=None, name="conv_{}d_layer", verbose=True, ): @@ -1128,6 +1174,9 @@ def new_conv_nd_layers( set to zero. If only one boolean is given, it will apply to all layers. + seed : None or int, optional + The seed to be used for random initialisation of weights. + name : str, optional An optional name for the layers. @@ -1276,6 +1325,9 @@ def new_conv_nd_layers( hex_zero_out_list = [hex_zero_out_list for i in range(num_layers)] # --------------- + # create seed counter + cnt = SeedCounter(seed) + # create layers: layers = [] weights = [] @@ -1309,6 +1361,7 @@ def new_conv_nd_layers( hex_num_rotations=hex_num_rotations_list[i], hex_azimuth=hex_azimuth_list[i], hex_zero_out=hex_zero_out_list[i], + seed=cnt(), ) if verbose: print("{}_{:02d}".format(name, i), layer_i) diff --git a/tfscripts/compat/v1/weights.py b/tfscripts/compat/v1/weights.py index f7f7c51..b7ed279 100644 --- a/tfscripts/compat/v1/weights.py +++ b/tfscripts/compat/v1/weights.py @@ -10,10 +10,11 @@ import tensorflow as tf # constants +from tfscripts.utils import SeedCounter from tfscripts.compat.v1 import FLOAT_PRECISION -def new_weights(shape, stddev=1.0, name="weights"): +def new_weights(shape, stddev=1.0, name="weights", seed=None): """Helper-function to create new weights Parameters @@ -25,6 +26,8 @@ def new_weights(shape, stddev=1.0, name="weights"): std. deviation. name : str, optional The name of the tensor. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -33,14 +36,17 @@ def new_weights(shape, stddev=1.0, name="weights"): """ return tf.Variable( tf.random.truncated_normal( - shape, stddev=stddev, dtype=FLOAT_PRECISION + shape, + stddev=stddev, + dtype=FLOAT_PRECISION, + seed=seed, ), name=name, dtype=FLOAT_PRECISION, ) -def new_kernel_weights(shape, stddev=0.01, name="weights"): +def new_kernel_weights(shape, stddev=0.01, name="weights", seed=None): """ Get weights for a convolutional kernel. The weights will be initialised, so that convolution performs matrix multiplication over a single pixel. @@ -63,6 +69,7 @@ def new_kernel_weights(shape, stddev=0.01, name="weights"): A tensor with the weights. """ + rng = np.random.RandomState(seed) weight_initialisation = np.zeros(shape) spatial_shape = shape[:-2] middle_index = [ @@ -75,22 +82,25 @@ def new_kernel_weights(shape, stddev=0.01, name="weights"): weight_initialisation[middle_index] = 1.0 / np.sqrt(shape[-2]) # add random noise to break symmetry - weight_initialisation += np.random.normal( - size=shape, loc=0.0, scale=stddev - ) + weight_initialisation += rng.normal(size=shape, loc=0.0, scale=stddev) return tf.Variable(weight_initialisation, name=name, dtype=FLOAT_PRECISION) -def new_biases(length, stddev=1.0, name="biases"): +def new_biases(length, stddev=1.0, name="biases", seed=None): """Get new biases. Parameters ---------- length : int Number of biases to get. + stddev : float, optional + The initial values are sampled from a truncated gaussian with this + std. deviation. name : str, optional The name of the tensor. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -99,19 +109,22 @@ def new_biases(length, stddev=1.0, name="biases"): """ return tf.Variable( tf.random.truncated_normal( - shape=[length], stddev=stddev, dtype=FLOAT_PRECISION + shape=[length], + stddev=stddev, + dtype=FLOAT_PRECISION, + seed=seed, ), name=name, dtype=FLOAT_PRECISION, ) - # return tf.Variable(tf.random_normal(shape=[length], - # stddev=2.0/length, - # dtype=FLOAT_PRECISION), - # name=name, dtype=FLOAT_PRECISION) def create_conv_nd_layers_weights( - num_input_channels, filter_size_list, num_filters_list, name="conv_{}d" + num_input_channels, + filter_size_list, + num_filters_list, + seed=None, + name="conv_{}d", ): """Create weights and biases for conv 3d layers @@ -134,6 +147,8 @@ def create_conv_nd_layers_weights( num_filters_list : list of int A list of int where each int denotes the number of filters in that layer. + seed : int, optional + Seed for the random number generator. name : str, optional Name of weights and biases. @@ -142,6 +157,8 @@ def create_conv_nd_layers_weights( list of tf.Tensor, list of tf.Tensor Returns the list of weight and bias tensors for each layer """ + # create seed counter + cnt = SeedCounter(seed) num_dims = len(filter_size_list[0]) name = name.format(num_dims) @@ -160,9 +177,12 @@ def create_conv_nd_layers_weights( weight_name = "weights_{}_{:03d}".format(name, i) bias_name = "biases_{}_{:03d}".format(name, i) - # weights_list.append(new_kernel_weights(shape=shape, name=weight_name)) - weights_list.append(new_weights(shape=shape, name=weight_name)) - biases_list.append(new_biases(length=num_filters, name=bias_name)) + weights_list.append( + new_weights(shape=shape, name=weight_name, seed=cnt()) + ) + biases_list.append( + new_biases(length=num_filters, name=bias_name, seed=cnt()) + ) # update number of input channels for next layer num_input_channels = num_filters @@ -171,7 +191,11 @@ def create_conv_nd_layers_weights( def create_fc_layers_weights( - num_inputs, fc_sizes, max_out_size_list=None, name="fc" + num_inputs, + fc_sizes, + max_out_size_list=None, + seed=None, + name="fc", ): """ Create weights and biases for @@ -186,6 +210,8 @@ def create_fc_layers_weights( max_out_size_list : None, optional If a list of int is given, it is interpreted as the maxout size for each layer. + seed : int, optional + Seed for the random number generator. name : str, optional Name of weights and biases. @@ -194,6 +220,9 @@ def create_fc_layers_weights( list of tf.Tensor, list of tf.Tensor Returns the list of weight and bias tensors for each layer """ + # create seed counter + cnt = SeedCounter(seed) + # create max out array if max_out_size_list is None: max_out_size_list = [None for i in range(len(fc_sizes))] @@ -208,9 +237,13 @@ def create_fc_layers_weights( bias_name = "biases_{}_{:03d}".format(name, i) weights_list.append( - new_weights(shape=[num_inputs, num_outputs], name=weight_name) + new_weights( + shape=[num_inputs, num_outputs], name=weight_name, seed=cnt() + ) + ) + biases_list.append( + new_biases(length=num_outputs, name=bias_name, seed=cnt()) ) - biases_list.append(new_biases(length=num_outputs, name=bias_name)) if max_out_size is None: num_inputs = num_outputs diff --git a/tfscripts/conv.py b/tfscripts/conv.py index 1e685de..d46d677 100644 --- a/tfscripts/conv.py +++ b/tfscripts/conv.py @@ -247,6 +247,7 @@ def __init__( padding="SAME", dilation_rate=None, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -275,6 +276,8 @@ def __init__( defines dilattion rate to be used float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -335,6 +338,7 @@ def __init__( shape=input_shape[1:] + [num_outputs], shared_axes=[0], float_precision=float_precision, + seed=seed, ) else: @@ -342,6 +346,7 @@ def __init__( shape=kernel_shape, shared_axes=[0], float_precision=float_precision, + seed=seed, ) self.output_shape = output_shape @@ -465,6 +470,7 @@ def __init__( padding="SAME", dilation_rate=None, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -493,6 +499,8 @@ def __init__( defines dilattion rate to be used float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -553,6 +561,7 @@ def __init__( shape=input_shape[1:] + [num_outputs], shared_axes=[0, 1], float_precision=float_precision, + seed=seed, ) else: @@ -560,6 +569,7 @@ def __init__( shape=kernel_shape, shared_axes=[0], float_precision=float_precision, + seed=seed, ) self.output_shape = output_shape @@ -708,6 +718,7 @@ def __init__( padding="SAME", dilation_rate=None, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -733,6 +744,8 @@ def __init__( defines dilattion rate to be used float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. """ @@ -788,6 +801,7 @@ def __init__( shape=input_shape[1:] + [num_outputs], shared_axes=[0, 1, 2], float_precision=float_precision, + seed=seed, ) else: @@ -795,6 +809,7 @@ def __init__( shape=kernel_shape, shared_axes=[0], float_precision=float_precision, + seed=seed, ) self.output_shape = output_shape diff --git a/tfscripts/core.py b/tfscripts/core.py index 9a2bd78..f02e150 100644 --- a/tfscripts/core.py +++ b/tfscripts/core.py @@ -31,6 +31,7 @@ def __init__( use_scale_factor=True, scale_factor=0.001, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -49,6 +50,8 @@ def __init__( use_scale_factor is True. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. """ @@ -66,6 +69,7 @@ def __init__( [self.num_outputs], stddev=self.scale_factor, float_precision=float_precision, + seed=seed, ) def __call__(self, input, residual): @@ -160,6 +164,7 @@ def __init__( input_shape=None, use_batch_normalisation=False, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -174,6 +179,8 @@ def __init__( True: use batch normalisation float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. """ @@ -192,23 +199,33 @@ def __init__( if activation_type == "prelu": self.slope_weight = new_weights( - input_shape[1:], float_precision=float_precision + input_shape[1:], + float_precision=float_precision, + seed=seed, ) elif activation_type == "pelu": self.a_weight = new_weights( - input_shape[1:], float_precision=float_precision + input_shape[1:], + float_precision=float_precision, + seed=seed, ) self.b_weight = new_weights( - input_shape[1:], float_precision=float_precision + input_shape[1:], + float_precision=float_precision, + seed=seed + 1, ) elif activation_type == "pgaussian": self.sigma_weight = new_weights( - input_shape[1:], float_precision=float_precision + input_shape[1:], + float_precision=float_precision, + seed=seed, ) self.mu = new_weights( - input_shape[1:], float_precision=float_precision + input_shape[1:], + float_precision=float_precision, + seed=seed + 1, ) def __call__(self, layer, is_training=None): diff --git a/tfscripts/hex/conv.py b/tfscripts/hex/conv.py index 0de3a3b..d4ba4fc 100644 --- a/tfscripts/hex/conv.py +++ b/tfscripts/hex/conv.py @@ -13,6 +13,7 @@ import tensorflow as tf # tfscripts specific imports +from tfscripts.utils import SeedCounter from tfscripts.weights import new_weights, new_biases from tfscripts.hex.visual import print_hex_data from tfscripts.hex import rotation @@ -77,6 +78,7 @@ def get_hex_kernel( print_kernel=False, get_ones=False, float_precision=FLOAT_PRECISION, + seed=None, ): """Get hexagonal convolution kernel @@ -137,6 +139,8 @@ def get_hex_kernel( hexagon. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -154,6 +158,9 @@ def get_hex_kernel( ValueError Description """ + # create seed counter + cnt = SeedCounter(seed) + k = filter_size[0] x = filter_size[1] @@ -189,7 +196,9 @@ def get_hex_kernel( weights = ones else: weights = new_weights( - filter_size[2:], float_precision=float_precision + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) test_hex_dict[(a, b)] = 1 @@ -231,7 +240,9 @@ def get_hex_kernel( weights = ones else: weights = new_weights( - filter_size[2:], float_precision=float_precision + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) test_hex_dict[(a, b)] = 1 @@ -264,6 +275,7 @@ def __init__( var_list=None, azimuth=None, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -340,6 +352,8 @@ def __init__( [given in degrees] in counterclockwise direction float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -362,6 +376,7 @@ def __init__( filter_size + [num_channels, num_filters], azimuth, float_precision=float_precision, + seed=seed, ) else: if num_rotations > 1: @@ -369,11 +384,13 @@ def __init__( filter_size + [num_channels, num_filters], num_rotations, float_precision=float_precision, + seed=seed, ) else: kernel, var_list = get_hex_kernel( filter_size + [num_channels, num_filters], float_precision=float_precision, + seed=seed, ) self.num_filters = num_filters @@ -385,11 +402,12 @@ def __init__( self.zero_out = zero_out self.azimuth = azimuth self.float_precision = float_precision + self.seed = seed self.kernel = kernel self.kernel_var_list = var_list def __call__(self, inputs): - """Apply Activation Module. + """Apply ConvHex Module. Parameters ---------- @@ -435,6 +453,7 @@ def __call__(self, inputs): result.get_shape().as_list()[3:], get_ones=True, float_precision=self.float_precision, + seed=self.seed, ) result = result * zero_out_matrix @@ -453,6 +472,7 @@ def __call__(self, inputs): ], get_ones=True, float_precision=self.float_precision, + seed=self.seed, ) # Make sure there were no extra variables created. @@ -491,6 +511,7 @@ def __init__( stack_axis=None, zero_out=False, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -573,6 +594,8 @@ def __init__( set to zero. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -595,6 +618,7 @@ def __init__( filter_size + [num_channels, num_filters], azimuth, float_precision=float_precision, + seed=seed, ) else: if num_rotations > 1: @@ -602,11 +626,13 @@ def __init__( filter_size + [num_channels, num_filters], num_rotations, float_precision=float_precision, + seed=seed, ) else: kernel, var_list = get_hex_kernel( filter_size + [num_channels, num_filters], float_precision=float_precision, + seed=seed, ) self.num_filters = num_filters @@ -619,11 +645,12 @@ def __init__( self.stack_axis = stack_axis self.zero_out = zero_out self.float_precision = float_precision + self.seed = seed self.kernel = kernel self.kernel_var_list = var_list def __call__(self, inputs): - """Apply Activation Module. + """Apply ConvHex4d Module. Parameters ---------- @@ -662,6 +689,7 @@ def __call__(self, inputs): ], get_ones=True, float_precision=self.float_precision, + seed=self.seed, ) # Make sure there were no extra variables created. @@ -689,6 +717,7 @@ def create_conv_hex_layers_weights( num_rotations_list=1, azimuth_list=None, float_precision=FLOAT_PRECISION, + seed=None, ): """Create weights and biases for conv hex n-dimensional layers with n >= 2 @@ -746,6 +775,8 @@ def create_conv_hex_layers_weights( If azimuth is None, the hexagonal kernel is not rotated. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -756,6 +787,8 @@ def create_conv_hex_layers_weights( list of tf.Variable A list of tensorflow variables created in this function """ + # create seed counter + cnt = SeedCounter(seed) # create num_rotations_list if isinstance(num_rotations_list, int): @@ -777,7 +810,10 @@ def create_conv_hex_layers_weights( ): if azimuth is not None: kernel, var_list = rotation.get_dynamic_rotation_hex_kernel( - filter_size, azimuth, float_precision=float_precision + filter_size, + azimuth, + float_precision=float_precision, + seed=cnt(), ) else: if num_rotations > 1: @@ -785,11 +821,13 @@ def create_conv_hex_layers_weights( filter_size + [num_input_channels, num_filters], num_rotations, float_precision=float_precision, + seed=cnt(), ) else: kernel, var_list = get_hex_kernel( filter_size + [num_input_channels, num_filters], float_precision=float_precision, + seed=cnt(), ) variable_list.extend(var_list) @@ -798,6 +836,7 @@ def create_conv_hex_layers_weights( new_biases( length=num_filters * num_rotations, float_precision=float_precision, + seed=cnt(), ) ) num_input_channels = num_filters diff --git a/tfscripts/hex/icecube.py b/tfscripts/hex/icecube.py index d6ab4a4..2dbcac2 100644 --- a/tfscripts/hex/icecube.py +++ b/tfscripts/hex/icecube.py @@ -7,6 +7,7 @@ import tensorflow as tf # tfscripts specific imports +from tfscripts.utils import SeedCounter from tfscripts.weights import new_weights # constants @@ -238,7 +239,9 @@ def get_icecube_string_from_hex_coord(a, b): return hex_string_coord_dict[(a, b)] -def get_icecube_kernel(shape, get_ones=False, float_precision=FLOAT_PRECISION): +def get_icecube_kernel( + shape, get_ones=False, float_precision=FLOAT_PRECISION, seed=None +): """ Get a kernel of shape 'shape' for IceCube where coordinates of no real strings are set to constant zeros. @@ -247,13 +250,14 @@ def get_icecube_kernel(shape, get_ones=False, float_precision=FLOAT_PRECISION): ---------- shape : list of int The shape of the desired kernel. - get_ones : bool, optional If True, returns constant ones for real DOMs, zeros for virtual DOMs. If False, return trainable parameter for real DOMs, zeros for virtual DOMs float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -262,6 +266,9 @@ def get_icecube_kernel(shape, get_ones=False, float_precision=FLOAT_PRECISION): list of tf.Variable A list of tensorflow variables created in this function """ + # create seed counter + cnt = SeedCounter(seed) + zeros = tf.zeros(shape, dtype=float_precision) ones = tf.ones(shape, dtype=float_precision) @@ -278,7 +285,9 @@ def get_icecube_kernel(shape, get_ones=False, float_precision=FLOAT_PRECISION): weights = ones else: weights = new_weights( - shape, float_precision=float_precision + shape, + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) else: diff --git a/tfscripts/hex/rotation.py b/tfscripts/hex/rotation.py index 92f0b85..5bd2015 100644 --- a/tfscripts/hex/rotation.py +++ b/tfscripts/hex/rotation.py @@ -12,6 +12,8 @@ import numpy as np import tensorflow as tf +from tfscripts.utils import SeedCounter + # tfscripts specific imports from tfscripts.weights import new_weights @@ -96,7 +98,10 @@ def tf_get_rotated_corner_weights(corner_weights, azimuth): def get_dynamic_rotation_hex_kernel( - filter_size, azimuth, float_precision=FLOAT_PRECISION + filter_size, + azimuth, + float_precision=FLOAT_PRECISION, + seed=None, ): """Dynamically azimuthally rotated hexagonal kernels. @@ -135,6 +140,8 @@ def get_dynamic_rotation_hex_kernel( be dynamically rotated. Azimuth angle is given in degrees. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -151,6 +158,9 @@ def get_dynamic_rotation_hex_kernel( Description """ + # create seed counter + cnt = SeedCounter(seed) + var_list = [] no_of_dims = len(filter_size) @@ -158,7 +168,9 @@ def get_dynamic_rotation_hex_kernel( [tf.shape(input=azimuth)[0]] + filter_size[2:], dtype=float_precision ) center_weight = new_weights( - [1] + filter_size[2:], float_precision=float_precision + [1] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(center_weight) multiples = [tf.shape(input=azimuth)[0]] + [1] * (no_of_dims - 2) @@ -168,19 +180,25 @@ def get_dynamic_rotation_hex_kernel( if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) corner_weights2 = [] for i in range(6): weights = new_weights( - filter_size[2:], float_precision=float_precision + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) corner_weights2.extend([Z, weights]) @@ -188,27 +206,37 @@ def get_dynamic_rotation_hex_kernel( elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) corner_weights2 = new_weights( - [12] + filter_size[2:], float_precision=float_precision + [12] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights2) elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) corner_weights2 = new_weights( - [12] + filter_size[2:], float_precision=float_precision + [12] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights2) corner_weights3 = [] for i in range(6): weights = new_weights( - filter_size[2:], float_precision=float_precision + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) corner_weights3.extend([Z, weights, Z]) @@ -216,17 +244,23 @@ def get_dynamic_rotation_hex_kernel( elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) corner_weights2 = new_weights( - [12] + filter_size[2:], float_precision=float_precision + [12] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights2) corner_weights3 = [] for i in range(6): weights = new_weights( - filter_size[2:], float_precision=float_precision + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) corner_weights3.extend([Z, Z, weights]) @@ -234,15 +268,21 @@ def get_dynamic_rotation_hex_kernel( elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter corner_weights1 = new_weights( - [6] + filter_size[2:], float_precision=float_precision + [6] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights1) corner_weights2 = new_weights( - [12] + filter_size[2:], float_precision=float_precision + [12] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights2) corner_weights3 = new_weights( - [18] + filter_size[2:], float_precision=float_precision + [18] + filter_size[2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(corner_weights3) else: @@ -324,7 +364,10 @@ def get_dynamic_rotation_hex_kernel( # hexagonal azimuth rotated filters # ------------------------------------------------------------------------- def get_rotated_hex_kernel( - filter_size, num_rotations, float_precision=FLOAT_PRECISION + filter_size, + num_rotations, + float_precision=FLOAT_PRECISION, + seed=None, ): """ Create Weights for a hexagonal kernel. @@ -363,6 +406,8 @@ def get_rotated_hex_kernel( Kernels will be rotated by 360 degrees / num_rotations float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -380,12 +425,17 @@ def get_rotated_hex_kernel( Description """ + # create seed counter + cnt = SeedCounter(seed) + # define function to get new weights with correct shape var_list = [] def get_new_weights(var_list): weights = new_weights( - filter_size[2:-2], float_precision=float_precision + filter_size[2:-2], + float_precision=float_precision, + seed=cnt(), ) var_list.append(weights) return weights @@ -445,7 +495,9 @@ def get_new_weights(var_list): rotated_kernels = [] in_out_channel_weights = new_weights( - [num_rotations] + filter_size[-2:], float_precision=float_precision + [num_rotations] + filter_size[-2:], + float_precision=float_precision, + seed=cnt(), ) var_list.append(in_out_channel_weights) diff --git a/tfscripts/hex/visual.py b/tfscripts/hex/visual.py index d47386e..b3d712a 100644 --- a/tfscripts/hex/visual.py +++ b/tfscripts/hex/visual.py @@ -164,7 +164,10 @@ def plot_hex2D(hex_grid, file=None, hex_grid_spacing=1.0, norm="linear"): def visualize_rotated_hex_kernel( - filter_size, num_rotations, file="Rotation_{azimuth:2.2f}.png" + filter_size, + num_rotations, + file="Rotation_{azimuth:2.2f}.png", + seed=None, ): """Visualize hexagonal azimuth rotated filters @@ -189,62 +192,57 @@ def visualize_rotated_hex_kernel( num_rotations : int. number of rotational kernels to create. Kernels will be rotated by 360 degrees / num_rotations - - file : str, optional A file pattern to which the plots of the rotated kernels will be saved to. The file pattern is formatted with a keyword 'azimuth' which holds the current azimuth rotation. + seed : int, optional + Seed for random number generator. Raises ------ ValueError Description """ + rng = np.random.RandomState(seed) azimuths = np.linspace(0, 360, num_rotations + 1)[:-1] Z = 0 - center_weight = np.random.uniform(1, high=15, size=1) + center_weight = rng.uniform(1, high=15, size=1) # HARDCODE MAGIC... ToDo: Generalize if filter_size[0:2] == [2, 0]: # hexagonal 2,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) + corner_weights1 = rng.uniform(1, high=15, size=6) elif filter_size[0:2] == [2, 1]: # hexagonal 2,1 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) + corner_weights1 = rng.uniform(1, high=15, size=6) corner_weights2 = [] for i in range(6): - corner_weights2.extend( - [Z, np.random.uniform(1, high=15, size=1)[0]] - ) + corner_weights2.extend([Z, rng.uniform(1, high=15, size=1)[0]]) elif filter_size[0:2] == [3, 0]: # hexagonal 3,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) elif filter_size[0:2] == [3, 1]: # hexagonal 3,1 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) corner_weights3 = [] for i in range(6): - corner_weights3.extend( - [Z, np.random.uniform(1, high=15, size=1)[0], Z] - ) + corner_weights3.extend([Z, rng.uniform(1, high=15, size=1)[0], Z]) elif filter_size[0:2] == [3, 2]: # hexagonal 3,2 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) corner_weights3 = [] for i in range(6): - corner_weights3.extend( - [Z, Z, np.random.uniform(1, high=15, size=1)[0]] - ) + corner_weights3.extend([Z, Z, rng.uniform(1, high=15, size=1)[0]]) elif filter_size[0:2] == [4, 0]: # hexagonal 4,0 Filter - corner_weights1 = np.random.uniform(1, high=15, size=6) - corner_weights2 = np.random.uniform(1, high=15, size=12) - corner_weights3 = np.random.uniform(1, high=15, size=18) + corner_weights1 = rng.uniform(1, high=15, size=6) + corner_weights2 = rng.uniform(1, high=15, size=12) + corner_weights3 = rng.uniform(1, high=15, size=18) else: raise ValueError( "visualize_rotated_hex_kernel: Unsupported hexagonal " diff --git a/tfscripts/layers.py b/tfscripts/layers.py index 17789bd..8314df7 100644 --- a/tfscripts/layers.py +++ b/tfscripts/layers.py @@ -8,6 +8,7 @@ import tensorflow as tf # tfscripts specific imports +from tfscripts.utils import SeedCounter from tfscripts.weights import new_weights, new_biases from tfscripts.weights import new_locally_connected_weights from tfscripts import conv @@ -160,6 +161,7 @@ def __init__( hex_azimuth=None, hex_zero_out=False, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -291,6 +293,8 @@ def __init__( set to zero. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : None or int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -303,6 +307,9 @@ def __init__( """ super(ConvNdLayer, self).__init__(name=name) + # create seed counter + cnt = SeedCounter(seed) + if isinstance(input_shape, tf.TensorShape): input_shape = input_shape.as_list() @@ -362,13 +369,17 @@ def __init__( if method.lower() == "convolution": if weights is None: weights = new_weights( - shape=shape, float_precision=float_precision + shape=shape, + float_precision=float_precision, + seed=cnt(), ) # Create new biases, one for each filter. if biases is None: biases = new_biases( - length=num_filters, float_precision=float_precision + length=num_filters, + float_precision=float_precision, + seed=cnt(), ) if num_dims == 1 or num_dims == 2 or num_dims == 3: @@ -418,6 +429,7 @@ def temp_func(inputs): kernel=weights, var_list=var_list, float_precision=float_precision, + seed=cnt(), ) elif num_dims == 4: self.conv_layer = hx.ConvHex4d( @@ -433,6 +445,7 @@ def temp_func(inputs): kernel=weights, var_list=var_list, float_precision=float_precision, + seed=cnt(), ) # Create new biases, one for each filter. @@ -440,6 +453,7 @@ def temp_func(inputs): biases = new_biases( length=num_filters * hex_num_rotations, float_precision=float_precision, + seed=cnt(), ) # ------------------- @@ -457,6 +471,7 @@ def temp_func(inputs): padding=padding, dilation_rate=dilation_rate, float_precision=float_precision, + seed=cnt(), ) elif num_dims == 2: self.conv_layer = conv.LocallyConnected2d( @@ -468,6 +483,7 @@ def temp_func(inputs): padding=padding, dilation_rate=dilation_rate, float_precision=float_precision, + seed=cnt(), ) elif num_dims == 3: self.conv_layer = conv.LocallyConnected3d( @@ -479,6 +495,7 @@ def temp_func(inputs): padding=padding, dilation_rate=dilation_rate, float_precision=float_precision, + seed=cnt(), ) elif num_dims == 4: raise NotImplementedError( @@ -491,6 +508,7 @@ def temp_func(inputs): shape=self.conv_layer.output_shape[1:], shared_axes=[i for i in range(num_dims)], float_precision=float_precision, + seed=cnt(), ) # ------------------- @@ -548,7 +566,9 @@ def temp_func(inputs): if biases is None: biases = new_biases( - length=num_filters, float_precision=float_precision + length=num_filters, + float_precision=float_precision, + seed=cnt(), ) else: @@ -567,6 +587,7 @@ def temp_func(inputs): input_shape=conv_layer_output.shape, use_batch_normalisation=use_batch_normalisation, float_precision=float_precision, + seed=cnt(), ) # assign and keep track of settings @@ -595,6 +616,7 @@ def temp_func(inputs): residual_shape=self.output_shape, strides=strides, float_precision=float_precision, + seed=cnt(), ) def __call__(self, inputs, is_training, keep_prob=None): @@ -777,6 +799,7 @@ def __init__( max_out_size=None, repair_std_deviation=True, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -814,6 +837,8 @@ def __init__( a std deviation of 1. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : None or int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -830,10 +855,13 @@ def __init__( weights = new_weights( shape=[num_inputs, num_outputs], float_precision=float_precision, + seed=seed, ) if biases is None: biases = new_biases( - length=num_outputs, float_precision=float_precision + length=num_outputs, + float_precision=float_precision, + seed=seed, ) self.biases = biases @@ -844,6 +872,7 @@ def __init__( input_shape=[None, num_outputs], use_batch_normalisation=use_batch_normalisation, float_precision=float_precision, + seed=seed, ) # calculate residual strides @@ -872,6 +901,7 @@ def __init__( residual_shape=output_shape, strides=res_strides, float_precision=float_precision, + seed=seed, ) self.output_shape = output_shape @@ -974,6 +1004,7 @@ def __init__( max_out_size=None, repair_std_deviation=True, float_precision=FLOAT_PRECISION, + seed=None, name=None, ): """Initialize object @@ -1011,6 +1042,8 @@ def __init__( a std deviation of 1. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : None or int, optional + Seed for the random number generator. name : None, optional The name of the tensorflow module. @@ -1033,11 +1066,13 @@ def __init__( weights = new_weights( shape=[num_channels, num_inputs, num_outputs], float_precision=float_precision, + seed=seed, ) if biases is None: biases = new_weights( shape=[num_outputs, num_channels], float_precision=float_precision, + seed=seed, ) self.biases = biases @@ -1048,6 +1083,7 @@ def __init__( input_shape=transpose_shape, use_batch_normalisation=use_batch_normalisation, float_precision=float_precision, + seed=seed, ) # # calculate residual strides @@ -1071,6 +1107,7 @@ def __init__( residual_shape=[None, num_channels, num_outputs], strides=res_strides, float_precision=float_precision, + seed=seed, ) # calculate output shape @@ -1189,6 +1226,7 @@ def __init__( max_out_size_list=None, repair_std_deviation_list=True, float_precision=FLOAT_PRECISION, + seed=None, name="fc_layer", verbose=False, ): @@ -1238,6 +1276,8 @@ def __init__( layers. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : None or int, optional + Seed for the random number generator. name : str, optional An optional name for the layers. @@ -1296,6 +1336,9 @@ def __init__( "Input dimension is wrong: {}".format(input_shape) ) + # create seed counter + cnt = SeedCounter(seed) + # create layers: self.layers = [] for i in range(num_layers): @@ -1315,6 +1358,7 @@ def __init__( max_out_size=max_out_size_list[i], repair_std_deviation=repair_std_deviation_list[i], float_precision=float_precision, + seed=cnt(), name="{}_{:03d}".format(name, i), ) if verbose: @@ -1378,6 +1422,7 @@ def __init__( hex_azimuth_list=None, hex_zero_out_list=False, float_precision=FLOAT_PRECISION, + seed=None, name="conv_{}d_layer", verbose=False, ): @@ -1520,7 +1565,9 @@ def __init__( set to zero. If only one boolean is given, it will apply to all layers. float_precision : TYPE, optional - Description + The tensorflow dtype describing the float precision to use. + seed : None, optional + Seed for the random number generator. name : str, optional An optional name for the layers. verbose : bool, optional @@ -1531,6 +1578,9 @@ def __init__( ValueError Description """ + # create seed counter + cnt = SeedCounter(seed) + num_dims = len(input_shape) name = name.format(num_dims - 2) @@ -1718,6 +1768,7 @@ def __init__( hex_azimuth=hex_azimuth_list[i], hex_zero_out=hex_zero_out_list[i], float_precision=float_precision, + seed=cnt(), name="{}_{:03d}".format(name, i), ) if verbose: diff --git a/tfscripts/model.py b/tfscripts/model.py index a5511a8..26c429d 100644 --- a/tfscripts/model.py +++ b/tfscripts/model.py @@ -17,6 +17,7 @@ def __init__( use_batch_normalisation_list=False, use_residual_list=False, dtype="float32", + seed=None, verbose=False, ): """Dense NN Model @@ -46,6 +47,8 @@ def __init__( layers. dtype : str, optional The float precision type. + seed : int, optional + Seed for the random number generator. verbose : bool, optional If True, print additional information during setup. """ @@ -57,6 +60,7 @@ def __init__( self.activation_list = activation_list self.use_batch_normalisation_list = use_batch_normalisation_list self.use_residual_list = use_residual_list + self.seed = seed tf_dtype = getattr(tf, dtype) @@ -105,6 +109,7 @@ def __init__( biases_list=None, max_out_size_list=None, float_precision=tf_dtype, + seed=seed, name="fc_layer", verbose=verbose, ) @@ -251,6 +256,7 @@ def get_config(self): "use_batch_normalisation_list": self.use_batch_normalisation_list, "use_residual_list": self.use_residual_list, "dtype": self.dtype, + "seed": self.seed, } for key, value in config.items(): if isinstance(value, (list, tuple)): @@ -270,6 +276,7 @@ def __init__( use_residual_list_unc=False, use_nth_fc_layer_as_input=None, min_sigma_value=1e-3, + seed=None, verbose=False, **kwargs ): @@ -308,10 +315,12 @@ def __init__( min_sigma_value : float The lower bound for the uncertainty estimation. This is used to ensure robustness of the training. + seed : int, optional + Seed for the random number generator. **kwargs Keyword arguments that are passed on to DenseNN initializer. """ - super().__init__(verbose=verbose, **kwargs) + super().__init__(seed=seed, verbose=verbose, **kwargs) self.fc_sizes_unc = fc_sizes_unc self.use_dropout_list_unc = use_dropout_list_unc @@ -343,6 +352,7 @@ def __init__( biases_list=None, max_out_size_list=None, float_precision=getattr(tf, self.dtype), + seed=seed, name="fc_layer_unc", verbose=verbose, ) diff --git a/tfscripts/utils.py b/tfscripts/utils.py index 6c858c8..d27c145 100644 --- a/tfscripts/utils.py +++ b/tfscripts/utils.py @@ -11,6 +11,20 @@ from tfscripts import FLOAT_PRECISION +class SeedCounter(object): + """A simple seed counter class for generating seeds.""" + + def __init__(self, seed=0): + self._seed_state = seed + + def __call__(self): + if self._seed_state is None: + return None + else: + self._seed_state += 1 + return self._seed_state + + def count_parameters(var_list=None): """Count number of trainable parameters diff --git a/tfscripts/weights.py b/tfscripts/weights.py index 9ead6af..73c5625 100644 --- a/tfscripts/weights.py +++ b/tfscripts/weights.py @@ -10,11 +10,16 @@ import tensorflow as tf # constants +from tfscripts.utils import SeedCounter from tfscripts import FLOAT_PRECISION def new_weights( - shape, stddev=1.0, name="weights", float_precision=FLOAT_PRECISION + shape, + stddev=1.0, + name="weights", + float_precision=FLOAT_PRECISION, + seed=None, ): """Helper-function to create new weights @@ -29,6 +34,8 @@ def new_weights( The name of the tensor. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -37,7 +44,10 @@ def new_weights( """ return tf.Variable( tf.random.truncated_normal( - shape, stddev=stddev, dtype=float_precision + shape, + stddev=stddev, + dtype=float_precision, + seed=seed, ), name=name, dtype=float_precision, @@ -50,6 +60,7 @@ def new_locally_connected_weights( name="weights", shared_axes=None, float_precision=FLOAT_PRECISION, + seed=None, ): """Helper-function to create new weights @@ -66,6 +77,8 @@ def new_locally_connected_weights( A list of axes over which the same initial values will be chosen. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -87,7 +100,10 @@ def new_locally_connected_weights( # sample initial values initial_value = tf.random.truncated_normal( - shape_init, stddev=stddev, dtype=float_precision + shape_init, + stddev=stddev, + dtype=float_precision, + seed=seed, ) # tile over shared axes @@ -97,7 +113,11 @@ def new_locally_connected_weights( def new_kernel_weights( - shape, stddev=0.01, name="weights", float_precision=FLOAT_PRECISION + shape, + stddev=0.01, + name="weights", + float_precision=FLOAT_PRECISION, + seed=None, ): """ Get weights for a convolutional kernel. The weights will be initialised, @@ -116,6 +136,8 @@ def new_kernel_weights( The name of the tensor. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -123,6 +145,7 @@ def new_kernel_weights( A tensor with the weights. """ + rng = np.random.RandomState(seed) weight_initialisation = np.zeros(shape) spatial_shape = shape[:-2] middle_index = [ @@ -135,15 +158,17 @@ def new_kernel_weights( weight_initialisation[middle_index] = 1.0 / np.sqrt(shape[-2]) # add random noise to break symmetry - weight_initialisation += np.random.normal( - size=shape, loc=0.0, scale=stddev - ) + weight_initialisation += rng.normal(size=shape, loc=0.0, scale=stddev) return tf.Variable(weight_initialisation, name=name, dtype=float_precision) def new_biases( - length, stddev=1.0, name="biases", float_precision=FLOAT_PRECISION + length, + stddev=1.0, + name="biases", + float_precision=FLOAT_PRECISION, + seed=None, ): """Get new biases. @@ -158,6 +183,8 @@ def new_biases( The name of the tensor. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- @@ -166,15 +193,14 @@ def new_biases( """ return tf.Variable( tf.random.truncated_normal( - shape=[length], stddev=stddev, dtype=float_precision + shape=[length], + stddev=stddev, + dtype=float_precision, + seed=seed, ), name=name, dtype=float_precision, ) - # return tf.Variable(tf.random_normal(shape=[length], - # stddev=2.0/length, - # dtype=float_precision), - # name=name, dtype=float_precision) def create_conv_nd_layers_weights( @@ -183,6 +209,7 @@ def create_conv_nd_layers_weights( num_filters_list, name="conv_{}d", float_precision=FLOAT_PRECISION, + seed=None, ): """Create weights and biases for conv 3d layers @@ -209,12 +236,16 @@ def create_conv_nd_layers_weights( Name of weights and biases. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- list of tf.Tensor, list of tf.Tensor Returns the list of weight and bias tensors for each layer """ + # create seed counter + cnt = SeedCounter(seed) num_dims = len(filter_size_list[0]) name = name.format(num_dims) @@ -233,10 +264,12 @@ def create_conv_nd_layers_weights( weight_name = "weights_{}_{:03d}".format(name, i) bias_name = "biases_{}_{:03d}".format(name, i) - # weights_list.append(new_kernel_weights(shape=shape, name=weight_name)) weights_list.append( new_weights( - shape=shape, name=weight_name, float_precision=float_precision + shape=shape, + name=weight_name, + float_precision=float_precision, + seed=cnt(), ) ) biases_list.append( @@ -244,6 +277,7 @@ def create_conv_nd_layers_weights( length=num_filters, name=bias_name, float_precision=float_precision, + seed=cnt(), ) ) @@ -259,6 +293,7 @@ def create_fc_layers_weights( max_out_size_list=None, name="fc", float_precision=FLOAT_PRECISION, + seed=None, ): """ Create weights and biases for @@ -277,12 +312,17 @@ def create_fc_layers_weights( Name of weights and biases. float_precision : tf.dtype, optional The tensorflow dtype describing the float precision to use. + seed : int, optional + Seed for the random number generator. Returns ------- list of tf.Tensor, list of tf.Tensor Returns the list of weight and bias tensors for each layer """ + # create seed counter + cnt = SeedCounter(seed) + # create max out array if max_out_size_list is None: max_out_size_list = [None for i in range(len(fc_sizes))] @@ -301,6 +341,7 @@ def create_fc_layers_weights( shape=[num_inputs, num_outputs], name=weight_name, float_precision=float_precision, + seed=cnt(), ) ) biases_list.append( @@ -308,6 +349,7 @@ def create_fc_layers_weights( length=num_outputs, name=bias_name, float_precision=float_precision, + seed=cnt(), ) ) From 6acd74d49a68ec6f160dab19897ccd4f881058c7 Mon Sep 17 00:00:00 2001 From: mhuen Date: Sun, 21 Apr 2024 15:03:51 +0200 Subject: [PATCH 2/2] Set seed to dropout --- tfscripts/compat/v1/layers.py | 6 +++--- tfscripts/layers.py | 9 ++++++--- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/tfscripts/compat/v1/layers.py b/tfscripts/compat/v1/layers.py index a2bdeec..584ba05 100644 --- a/tfscripts/compat/v1/layers.py +++ b/tfscripts/compat/v1/layers.py @@ -589,7 +589,7 @@ def new_conv_nd_layer( raise NotImplementedError("Only supported 2d, 3d, 4d!") if use_dropout: - layer = tf.nn.dropout(layer, rate=1 - (keep_prob)) + layer = tf.nn.dropout(layer, rate=1 - (keep_prob), seed=cnt()) return layer, weights, biases @@ -707,7 +707,7 @@ def new_fc_layer( ) if use_dropout: - layer = tf.nn.dropout(layer, rate=1 - (keep_prob)) + layer = tf.nn.dropout(layer, rate=1 - (keep_prob), seed=seed) return layer, weights, biases @@ -835,7 +835,7 @@ def new_channel_wise_fc_layer( ) if use_dropout: - layer = tf.nn.dropout(layer, rate=1 - (keep_prob)) + layer = tf.nn.dropout(layer, rate=1 - (keep_prob), seed=seed) return layer, weights, biases diff --git a/tfscripts/layers.py b/tfscripts/layers.py index 8314df7..715b67a 100644 --- a/tfscripts/layers.py +++ b/tfscripts/layers.py @@ -603,6 +603,7 @@ def temp_func(inputs): self.method = method self.repair_std_deviation = repair_std_deviation self.float_precision = float_precision + self.seed = seed # get shape of output # todo: figure out better way to obtain this @@ -717,7 +718,7 @@ def __call__(self, inputs, is_training, keep_prob=None): layer = self._apply_pooling(layer) if self.use_dropout and is_training: - layer = tf.nn.dropout(layer, 1 - keep_prob) + layer = tf.nn.dropout(layer, 1 - keep_prob, seed=self.seed) return layer @@ -911,6 +912,7 @@ def __init__( self.use_dropout = use_dropout self.repair_std_deviation = repair_std_deviation self.float_precision = float_precision + self.seed = seed def __call__(self, inputs, is_training, keep_prob): """Apply Module. @@ -979,7 +981,7 @@ def __call__(self, inputs, is_training, keep_prob): layer = self.residual_add(input=inputs, residual=layer) if self.use_dropout and is_training: - layer = tf.nn.dropout(layer, 1 - keep_prob) + layer = tf.nn.dropout(layer, 1 - keep_prob, seed=self.seed) return layer @@ -1123,6 +1125,7 @@ def __init__( self.use_dropout = use_dropout self.repair_std_deviation = repair_std_deviation self.float_precision = float_precision + self.seed = seed def __call__(self, inputs, is_training, keep_prob): """Apply Module. @@ -1205,7 +1208,7 @@ def __call__(self, inputs, is_training, keep_prob): ) if self.use_dropout and is_training: - layer = tf.nn.dropout(layer, 1 - keep_prob) + layer = tf.nn.dropout(layer, 1 - keep_prob, seed=self.seed) return layer