Merge pull request #15 from icecube/RandomSeeds

Expose seed parameter to weight initialization and dropout
icecube · Apr 21, 2024 · 07d88bc · 07d88bc
2 parents 6e444ae + 6acd74d
commit 07d88bc
Show file tree

Hide file tree

Showing 18 changed files with 644 additions and 211 deletions.
diff --git a/tfscripts/compat/v1/conv.py b/tfscripts/compat/v1/conv.py
@@ -239,6 +239,7 @@ def locally_connected_2d(
     strides=[1, 1],
     padding="SAME",
     dilation_rate=None,
+    seed=None,
 ):
     """
     Like conv2d, but doesn't share weights.
@@ -261,10 +262,11 @@ def locally_connected_2d(
               The stride of the sliding window for each dimension of input.
     padding : A string from: "SAME", "VALID".
         The type of padding algorithm to use.
-
     dilation_rate : None or list of int of length 2
         [dilattion in x, dilation in y]
         defines dilattion rate to be used
+    seed : None or int
+        Seed for the random number generator.
 
     Returns
     -------
@@ -316,7 +318,9 @@ def locally_connected_2d(
     # fast shortcut
     if list(filter_size) == [1, 1]:
         if kernel is None:
-            kernel = new_weights(shape=input_shape[1:] + [num_outputs])
+            kernel = new_weights(
+                shape=input_shape[1:] + [num_outputs], seed=seed
+            )
         output = tf.reduce_sum(
             input_tensor=tf.expand_dims(input, axis=4) * kernel, axis=3
         )
@@ -412,7 +416,7 @@ def locally_connected_2d(
     # get kernel
     # ------------------
     if kernel is None:
-        kernel = new_weights(shape=kernel_shape)
+        kernel = new_weights(shape=kernel_shape, seed=seed)
 
     # ------------------
     # perform convolution
@@ -431,6 +435,7 @@ def locally_connected_3d(
     strides=[1, 1, 1],
     padding="SAME",
     dilation_rate=None,
+    seed=None,
 ):
     """
     Like conv3d, but doesn't share weights.
@@ -456,6 +461,8 @@ def locally_connected_3d(
     dilation_rate : None or list of int of length 3
         [dilattion in x, dilation in y, dilation in z]
         defines dilattion rate to be used
+    seed : None or int
+        Seed for the random number generator.
 
     Returns
     -------
@@ -507,7 +514,9 @@ def locally_connected_3d(
     # fast shortcut
     if list(filter_size) == [1, 1, 1]:
         if kernel is None:
-            kernel = new_weights(shape=input_shape[1:] + [num_outputs])
+            kernel = new_weights(
+                shape=input_shape[1:] + [num_outputs], seed=seed
+            )
         output = tf.reduce_sum(
             input_tensor=tf.expand_dims(input, axis=5) * kernel, axis=4
         )
@@ -628,7 +637,7 @@ def locally_connected_3d(
     # get kernel
     # ------------------
     if kernel is None:
-        kernel = new_weights(shape=kernel_shape)
+        kernel = new_weights(shape=kernel_shape, seed=seed)
 
     # ------------------
     # perform convolution
@@ -663,30 +672,24 @@ def local_translational3d_trafo(
         float32, float64, int64, int32, uint8, uint16, int16, int8, complex64,
         complex128, qint8, quint8, qint32, half.
         Shape [batch, in_depth, in_height, in_width, in_channels].
-
     num_outputs : int
         Number of output channels
-
     filter_size : list of int of size 3
             [filter x size, filter y size, filter z size]
-
     fcn : callable: fcn(input_patch)
             Defines the transformation:
               input_patch -> output
               with output.shape = [-1, num_outputs]
-
     weights : None, optional
         Description
     strides : A list of ints that has length >= 5. 1-D tensor of length 5.
             The stride of the sliding window for each dimension of input.
             Must have strides[0] = strides[4] = 1.
     padding : A string from: "SAME", "VALID".
         The type of padding algorithm to use.
-
     dilation_rate :None or list of int of length 3
         [dilattion in x, dilation in y, dilation in z]
         defines dilattion rate to be used
-
     is_training : bool, optional
         Indicates whether currently in training or inference mode.
         True: in training mode

diff --git a/tfscripts/compat/v1/core.py b/tfscripts/compat/v1/core.py
@@ -17,7 +17,12 @@
 
 
 def add_residual(
-    input, residual, strides=None, use_scale_factor=True, scale_factor=0.001
+    input,
+    residual,
+    strides=None,
+    use_scale_factor=True,
+    scale_factor=0.001,
+    seed=None,
 ):
     """Convenience function to add a residual
 
@@ -39,6 +44,8 @@ def add_residual(
     scale_factor : float, optional
         Defines how much the residuals will be scaled prior to addition if
         use_scale_factor is True.
+    seed : int, optional
+        Seed for the random number generator.
 
     Returns
     -------
@@ -78,7 +85,7 @@ def add_residual(
     # Residuals added over multiple layers accumulate.
     # A scale factor < 1 reduces instabilities in beginning
     if use_scale_factor:
-        scale = new_weights([num_outputs], stddev=scale_factor)
+        scale = new_weights([num_outputs], stddev=scale_factor, seed=seed)
         residual = residual * scale
         if num_inputs == num_outputs:
             output = residual + input
@@ -115,6 +122,7 @@ def activation(
     use_batch_normalisation=False,
     is_training=None,
     verbose=True,
+    seed=None,
 ):
     """
     Helper-functions to perform activation on a layer
@@ -218,16 +226,16 @@ def activation(
         )
 
     elif activation_type == "prelu":
-        slope = new_weights(layer.get_shape().as_list()[1:]) + 1.0
+        slope = new_weights(layer.get_shape().as_list()[1:], seed=seed) + 1.0
         layer = tf.where(
             tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)),
             layer * slope,
             layer,
         )
 
     elif activation_type == "pelu":
-        a = new_weights(layer.get_shape().as_list()[1:]) + 1.0
-        b = new_weights(layer.get_shape().as_list()[1:]) + 1.0
+        a = new_weights(layer.get_shape().as_list()[1:], seed=seed) + 1.0
+        b = new_weights(layer.get_shape().as_list()[1:], seed=seed + 1) + 1.0
         layer = tf.where(
             tf.less(layer, tf.constant(0, dtype=FLOAT_PRECISION)),
             (tf.exp(layer / b) - 1) * a,
@@ -238,10 +246,10 @@ def activation(
         layer = tf.exp(-tf.square(layer))
 
     elif activation_type == "pgaussian":
-        sigma = new_weights(layer.get_shape().as_list()[1:]) + tf.constant(
-            1.0, dtype=FLOAT_PRECISION
-        )
-        mu = new_weights(layer.get_shape().as_list()[1:])
+        sigma = new_weights(
+            layer.get_shape().as_list()[1:], seed=seed
+        ) + tf.constant(1.0, dtype=FLOAT_PRECISION)
+        mu = new_weights(layer.get_shape().as_list()[1:], seed=seed + 1)
         layer = tf.exp(
             tf.square((layer - mu) / sigma)
             * tf.constant(-0.5, dtype=FLOAT_PRECISION)