HIPS · mohit-surana · Jun 9, 2018 · Jun 9, 2018
diff --git a/autograd/misc/optimizers.py b/autograd/misc/optimizers.py
@@ -36,7 +36,9 @@ def sgd(grad, x, callback=None, num_iters=200, step_size=0.1, mass=0.9):
     velocity = np.zeros(len(x))
     for i in range(num_iters):
         g = grad(x, i)
-        if callback: callback(x, i, g)
+        if callback:
+            converged = callback(x, i, g)
+            if converged: break
         velocity = mass * velocity - (1.0 - mass) * g
         x = x + step_size * velocity
     return x
@@ -48,7 +50,9 @@ def rmsprop(grad, x, callback=None, num_iters=100,
     avg_sq_grad = np.ones(len(x))
     for i in range(num_iters):
         g = grad(x, i)
-        if callback: callback(x, i, g)
+        if callback:
+            converged = callback(x, i, g)
+            if converged: break
         avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
         x = x - step_size * g/(np.sqrt(avg_sq_grad) + eps)
     return x
@@ -62,7 +66,9 @@ def adam(grad, x, callback=None, num_iters=100,
     v = np.zeros(len(x))
     for i in range(num_iters):
         g = grad(x, i)
-        if callback: callback(x, i, g)
+        if callback:
+            converged = callback(x, i, g)
+            if converged: break
         m = (1 - b1) * g      + b1 * m  # First  moment estimate.
         v = (1 - b2) * (g**2) + b2 * v  # Second moment estimate.
         mhat = m / (1 - b1**(i + 1))    # Bias correction.

diff --git a/examples/optimizer_convergence.py b/examples/optimizer_convergence.py
@@ -0,0 +1,47 @@
+from __future__ import absolute_import
+from __future__ import print_function
+from builtins import range
+import autograd.numpy as np
+from autograd import grad
+from autograd.misc.optimizers import adam
+from autograd.test_util import check_grads
+
+def sigmoid(x):
+    return 0.5*(np.tanh(x) + 1)
+
+def logistic_predictions(weights, inputs):
+    # Outputs probability of a label being true according to logistic model.
+    return sigmoid(np.dot(inputs, weights))
+
+def training_loss(weights, t=None):
+    # Training loss is the negative log-likelihood of the training labels.
+    preds = logistic_predictions(weights, inputs)
+    label_probabilities = preds * targets + (1 - preds) * (1 - targets)
+    return -np.sum(np.log(label_probabilities))
+
+def callback(params, t, g):
+    # The return value of the callback function indicates convergence
+    tol = 1e-5
+    converged = False
+
+    # If the gradients take a very small value, the function has converged
+    # Other convergence criteria may be used as well
+    if np.sum(np.abs(g)) < tol:
+        print("Model converged in iteration {}".format(t))
+        converged = True
+
+    return converged
+
+# Build a toy dataset.
+inputs = np.random.randn(100, 5)
+targets = np.random.randint(0, 2, (100, ))
+
+# Build a function that returns gradients of training loss using autograd.
+training_gradient_fun = grad(training_loss)
+
+weights = np.zeros((5, ))
+
+# Optimize weights using gradient descent.
+print("Initial loss:", training_loss(weights))
+weights = adam(training_gradient_fun, weights, step_size=0.1, num_iters=500, callback=callback)
+print("Trained loss:", training_loss(weights))