diff --git a/autograd/misc/optimizers.py b/autograd/misc/optimizers.py index 54ebc02c..e0bedd9a 100644 --- a/autograd/misc/optimizers.py +++ b/autograd/misc/optimizers.py @@ -36,7 +36,9 @@ def sgd(grad, x, callback=None, num_iters=200, step_size=0.1, mass=0.9): velocity = np.zeros(len(x)) for i in range(num_iters): g = grad(x, i) - if callback: callback(x, i, g) + if callback: + converged = callback(x, i, g) + if converged: break velocity = mass * velocity - (1.0 - mass) * g x = x + step_size * velocity return x @@ -48,7 +50,9 @@ def rmsprop(grad, x, callback=None, num_iters=100, avg_sq_grad = np.ones(len(x)) for i in range(num_iters): g = grad(x, i) - if callback: callback(x, i, g) + if callback: + converged = callback(x, i, g) + if converged: break avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma) x = x - step_size * g/(np.sqrt(avg_sq_grad) + eps) return x @@ -62,7 +66,9 @@ def adam(grad, x, callback=None, num_iters=100, v = np.zeros(len(x)) for i in range(num_iters): g = grad(x, i) - if callback: callback(x, i, g) + if callback: + converged = callback(x, i, g) + if converged: break m = (1 - b1) * g + b1 * m # First moment estimate. v = (1 - b2) * (g**2) + b2 * v # Second moment estimate. mhat = m / (1 - b1**(i + 1)) # Bias correction. diff --git a/examples/optimizer_convergence.py b/examples/optimizer_convergence.py new file mode 100644 index 00000000..7293259e --- /dev/null +++ b/examples/optimizer_convergence.py @@ -0,0 +1,47 @@ +from __future__ import absolute_import +from __future__ import print_function +from builtins import range +import autograd.numpy as np +from autograd import grad +from autograd.misc.optimizers import adam +from autograd.test_util import check_grads + +def sigmoid(x): + return 0.5*(np.tanh(x) + 1) + +def logistic_predictions(weights, inputs): + # Outputs probability of a label being true according to logistic model. + return sigmoid(np.dot(inputs, weights)) + +def training_loss(weights, t=None): + # Training loss is the negative log-likelihood of the training labels. + preds = logistic_predictions(weights, inputs) + label_probabilities = preds * targets + (1 - preds) * (1 - targets) + return -np.sum(np.log(label_probabilities)) + +def callback(params, t, g): + # The return value of the callback function indicates convergence + tol = 1e-5 + converged = False + + # If the gradients take a very small value, the function has converged + # Other convergence criteria may be used as well + if np.sum(np.abs(g)) < tol: + print("Model converged in iteration {}".format(t)) + converged = True + + return converged + +# Build a toy dataset. +inputs = np.random.randn(100, 5) +targets = np.random.randint(0, 2, (100, )) + +# Build a function that returns gradients of training loss using autograd. +training_gradient_fun = grad(training_loss) + +weights = np.zeros((5, )) + +# Optimize weights using gradient descent. +print("Initial loss:", training_loss(weights)) +weights = adam(training_gradient_fun, weights, step_size=0.1, num_iters=500, callback=callback) +print("Trained loss:", training_loss(weights))