tensorflow · JimClarke5 · Oct 8, 2020 · Oct 27, 2020 · Nov 17, 2020 · Nov 19, 2020
diff --git a/tensorflow-core/tensorflow-core-api/src/gen/annotations/org/tensorflow/op/Ops.java b/tensorflow-core/tensorflow-core-api/src/gen/annotations/org/tensorflow/op/Ops.java
@@ -354,20 +354,20 @@ public final class Ops {
 
   public final SparseOps sparse;
 
-  public final TpuOps tpu;
-
   public final BitwiseOps bitwise;
 
+  public final TpuOps tpu;
+
   public final MathOps math;
 
   public final AudioOps audio;
 
   public final SignalOps signal;
 
-  public final TrainOps train;
-
   public final QuantizationOps quantization;
 
+  public final TrainOps train;
+
   private final Scope scope;
 
   private Ops(Scope scope) {
@@ -385,13 +385,13 @@ private Ops(Scope scope) {
     random = new RandomOps(this);
     strings = new StringsOps(this);
     sparse = new SparseOps(this);
-    tpu = new TpuOps(this);
     bitwise = new BitwiseOps(this);
+    tpu = new TpuOps(this);
     math = new MathOps(this);
     audio = new AudioOps(this);
     signal = new SignalOps(this);
-    train = new TrainOps(this);
     quantization = new QuantizationOps(this);
+    train = new TrainOps(this);
   }
 
   /**

diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Activation.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Activation.java
@@ -18,14 +18,7 @@
 import org.tensorflow.op.Ops;
 import org.tensorflow.types.family.TNumber;
 
-/**
- * Abstract base class for Activations
- *
- * <p><b>Note:</b> The {@link #tf} attribute must be set prior to invoking the call method. See
- * {@link #setTF(Ops)} and the constructor {@link #Activation(Ops)}.
- *
- * @param <T> the data type of the activation
- */
+/** Abstract base class for Activations */
 public abstract class Activation<T extends TNumber> {
 
   /** The TensorFlow Ops */
@@ -41,28 +34,29 @@ protected Activation(Ops tf) {
   }
 
   /**
-   * Sets the TensorFlow Ops
+   * Gets the TensorFlow Ops
    *
-   * @param tf the TensorFlow Ops
+   * @return the TensorFlow Ops
    */
-  protected void setTF(Ops tf) {
-    this.tf = tf;
+  protected Ops getTF() {
+    return this.tf;
   }
 
   /**
-   * Gets the TensorFlow Ops
+   * Sets the TensorFlow Ops
    *
-   * @return the TensorFlow Ops
+   * @param tf the TensorFlow Ops
    */
-  protected Ops getTF() {
-    return this.tf;
+  protected void setTF(Ops tf) {
+    this.tf = tf;
   }
 
   /**
    * Gets the calculation operation for the activation.
    *
    * @param input the input tensor
+   * @param <U> the data type of the input and result
    * @return The operand for the activation
    */
-  public abstract Operand<T> call(Operand<T> input);
+  public abstract <U extends T> Operand<U> call(Operand<U> input);
 }
diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/ELU.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/ELU.java
@@ -44,11 +44,10 @@
  *     Operand&lt;TFloat32&gt; result = elu.call(input);
  * </pre>
  *
- * @param <T> the data type of the activation
  * @see <a href="https://arxiv.org/abs/1511.07289">Clevert et al, 2016, Fast and Accurate Deep
  *     Network Learning by Exponential Linear Units (ELUs)</a>
  */
-public class ELU<T extends TFloating> extends Activation<T> {
+public class ELU extends Activation<TFloating> {
 
   private static final double ALPHA_DEFAULT = 1.0;
 
@@ -76,20 +75,16 @@ public ELU(Ops tf, double alpha) {
     this.alpha = alpha;
   }
 
-  /**
-   * Gets the calculation operation for the activation.
-   *
-   * @param input the input tensor
-   * @return The operand for the activation
-   */
+  /** {@inheritDoc} */
   @Override
-  public Operand<T> call(Operand<T> input) {
+  public <U extends TFloating> Operand<U> call(Operand<U> input) {
 
-    Operand<T> result = tf.nn.elu(input);
-    if (alpha == 1.0) return result;
-    else {
-      Class<T> inputType = input.type();
-      Operand<T> y = tf.math.mul(result, tf.dtypes.cast(tf.constant(alpha), inputType));
+    Operand<U> result = tf.nn.elu(input);
+    if (alpha == 1.0) {
+      return result;
+    } else {
+      Class<U> inputType = input.type();
+      Operand<U> y = tf.math.mul(result, tf.dtypes.cast(tf.constant(alpha), inputType));
       Operand<TBool> cond = tf.math.greater(result, tf.dtypes.cast(tf.constant(0), inputType));
       return tf.select(cond, result, y);
     }

diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Exponential.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Exponential.java
@@ -30,10 +30,8 @@
  *   Operand&lt;TFloat32&gt; result = exp.call(input);
  *   // result is [0.04978707f,  0.36787945f,  1.f,  2.7182817f, 20.085537f]
  * </pre>
- *
- * @param <T> the data type of the activation
  */
-public class Exponential<T extends TFloating> extends Activation<T> {
+public class Exponential extends Activation<TFloating> {
 
   /**
    * Creates an Exponential activation.
@@ -48,10 +46,13 @@ public Exponential(Ops tf) {
    * Calculates the Exponential activation.
    *
    * @param input the input tensor
+   * @param <U> the data type of the input and result
    * @return an Operand for the exponential activation: <code>exp(x)</code>.
    */
   @Override
-  public Operand<T> call(Operand<T> input) {
+  public <U extends TFloating> Operand<U> call(Operand<U> input) {
+
+
     return tf.math.exp(input);
   }
 }
diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/GeLU.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/GeLU.java
@@ -0,0 +1,122 @@
+/* Copyright 2020 The TensorFlow Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License.
+=======================================================================*/
+package org.tensorflow.framework.activations;
+
+import org.tensorflow.Operand;
+import org.tensorflow.op.Ops;
+import org.tensorflow.types.family.TFloating;
+
+import static org.tensorflow.framework.utils.CastHelper.cast;
+
+/**
+ * Applies the Gaussian error linear unit (GELU) activation function.
+ *
+ * <p>Gaussian error linear unit (GELU) computes {@code x * P(X <= x)}, where {@code P(X) ~ N(0,
+ * 1)}. The (GELU) nonlinearity weights inputs by their value, rather than gates inputs by their
+ * sign as in ReLU. if <code>approximate</code> is <code>true</code> :
+ *
+ * <pre>
+ *     0.5 * x * (1 + tanh(sqrt(2 / pi) * (x + 0.044715 * x^3)))
+ * </pre>
+ *
+ * <p>or, if <code>approximate</code> is <code>false</code>.
+ *
+ * <pre>
+ *     x * P(X &lt;= x) = 0.5 * x * (1 + erf(x / sqrt(2))),
+ * </pre>
+ *
+ * where <code>P(X) ~ N(0, 1)</code>.
+ *
+ * @see <a href="https://arxiv.org/abs/1606.08415">Hendrycks, Dan and Gimpel, Kevin, 2016-2020,
+ *     Gaussian Error Linear Units (GELUs)</a>
+ */
+public class GeLU extends Activation<TFloating> {
+
+  private final boolean approximate;
+
+  /**
+   * Creates a e Gaussian error linear unit (GELU) activation, with approximate set to false
+   *
+   * @param tf The TensorFlow ops
+   */
+  public GeLU(Ops tf) {
+    this(tf, false);
+  }
+
+  /**
+   * Creates a e Gaussian error linear unit (GELU) activation
+   *
+   * @param tf The TensorFlow ops
+   * @param approximate indicator whether to enable approximation.
+   */
+  public GeLU(Ops tf, boolean approximate) {
+    super(tf);
+    this.approximate = approximate;
+  }
+
+  /** {@inheritDoc} */
+  @Override
+  public <U extends TFloating> Operand<U> call(Operand<U> input) {
+
+    if (approximate) {
+      /*
+              coeff = math_ops.cast(0.044715, features.dtype)
+              return 0.5 * features * (
+                    1.0 + math_ops.tanh(0.7978845608028654 *
+                              (features + coeff * math_ops.pow(features, 3))))
+      */
+      Operand<U> coeff = cast(tf, tf.constant(0.044715), input.type());
+      Operand<U> point5 = cast(tf, tf.constant(0.5), input.type());
+      Operand<U> one = cast(tf, tf.constant(1.0), input.type());
+
+      return tf.math.mul(
+          point5,
+          tf.math.mul(
+              input,
+              tf.math.add(
+                  one,
+                  tf.math.tanh(
+                      tf.math.mul(
+                          // sqrt(2.0 / PI)
+                          cast(tf, tf.constant(0.7978845608028654), input.type()),
+                          tf.math.add(
+                              input,
+                              tf.math.mul(
+                                  coeff,
+                                  tf.math.pow(input, cast(tf, tf.constant(3), input.type()))) // mul
+                              ) // add
+                          ) // mul
+                      ) // tanh
+                  ) // add
+              ) // mul
+          ); // mul
+
+    } else {
+      /*
+      return 0.5 * features * (1.0 + math_ops.erf(
+        features / math_ops.cast(1.4142135623730951, features.dtype)))
+       */
+      return tf.math.mul(
+          cast(tf, tf.constant(0.5), input.type()),
+          tf.math.mul(
+              input,
+              tf.math.add(
+                  cast(tf, tf.constant(1), input.type()),
+                  tf.math.erf(
+                      tf.math.div(
+                          input, cast(tf, tf.constant(1.4142135623730951), input.type()))))));
+    }
+  }
+}
diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/HardSigmoid.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/HardSigmoid.java
@@ -16,7 +16,7 @@
 
 import org.tensorflow.Operand;
 import org.tensorflow.op.Ops;
-import org.tensorflow.types.family.TFloating;
+import org.tensorflow.types.family.TNumber;
 
 /**
  * Hard sigmoid activation.
@@ -40,10 +40,8 @@
  *     Operand&lt;TFloat32&gt; result = hardSigmoid.call(input);
  *     // result is [0.f , 0.3f, 0.5f, 0.7f, 1.f]
  * </pre>
- *
- * @param <T> the data type of the result
  */
-public class HardSigmoid<T extends TFloating> extends Activation<T> {
+public class HardSigmoid extends Activation<TNumber> {
 
   /**
    * Creates Hard sigmoid activation.
@@ -54,19 +52,14 @@ public HardSigmoid(Ops tf) {
     super(tf);
   }
 
-  /**
-   * Gets the calculation operation for the activation.
-   *
-   * @param input the input tensor
-   * @return The operand for the activation
-   */
+  /** {@inheritDoc} */
   @Override
-  public Operand<T> call(Operand<T> input) {
-    Class<T> inputType = input.type();
-    Operand<T> point2 = tf.dtypes.cast(tf.constant(0.2), inputType);
-    Operand<T> point5 = tf.dtypes.cast(tf.constant(0.5), inputType);
+  public <U extends TNumber> Operand<U> call(Operand<U> input) {
+    Class<U> inputType = input.type();
+    Operand<U> point2 = tf.dtypes.cast(tf.constant(0.2), inputType);
+    Operand<U> point5 = tf.dtypes.cast(tf.constant(0.5), inputType);
 
-    Operand<T> x = tf.math.add(tf.math.mul(input, point2), point5);
+    Operand<U> x = tf.math.add(tf.math.mul(input, point2), point5);
     return tf.clipByValue(
         x, tf.dtypes.cast(tf.constant(0), inputType), tf.dtypes.cast(tf.constant(1), inputType));
   }

diff --git a/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Linear.java b/tensorflow-framework/src/main/java/org/tensorflow/framework/activations/Linear.java
@@ -19,9 +19,9 @@
 import org.tensorflow.types.family.TNumber;
 
 /**
- * Linear activation function  (pass-through).
+ * Linear activation function (pass-through).
  *
- * <p>The linear activation returns its input. It is also known as the Identity activation function.</p>
+ * <p>The linear activation returns its input. It is also known as the Identity activation function.
  *
  * <p>For example:
  *
@@ -33,7 +33,7 @@
  *    // result is [-3.0f,-1.0f, 0.0f,1.0f,3.0f]
  * </pre>
  */
-public class Linear<U extends TNumber> extends Activation<U> {
+public class Linear extends Activation<TNumber> {
 
   /**
    * Creates a linear activation.
@@ -46,7 +46,7 @@ public Linear(Ops tf) {
 
   /** {@inheritDoc} */
   @Override
-  public Operand<U> call(Operand<U> input) {
+  public <U extends TNumber> Operand<U> call(Operand<U> input) {
     return input;
   }
 }