Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Optimizer exit criteria #405

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 9 additions & 3 deletions autograd/misc/optimizers.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,9 @@ def sgd(grad, x, callback=None, num_iters=200, step_size=0.1, mass=0.9):
velocity = np.zeros(len(x))
for i in range(num_iters):
g = grad(x, i)
if callback: callback(x, i, g)
if callback:
converged = callback(x, i, g)
if converged: break
velocity = mass * velocity - (1.0 - mass) * g
x = x + step_size * velocity
return x
Expand All @@ -48,7 +50,9 @@ def rmsprop(grad, x, callback=None, num_iters=100,
avg_sq_grad = np.ones(len(x))
for i in range(num_iters):
g = grad(x, i)
if callback: callback(x, i, g)
if callback:
converged = callback(x, i, g)
if converged: break
avg_sq_grad = avg_sq_grad * gamma + g**2 * (1 - gamma)
x = x - step_size * g/(np.sqrt(avg_sq_grad) + eps)
return x
Expand All @@ -62,7 +66,9 @@ def adam(grad, x, callback=None, num_iters=100,
v = np.zeros(len(x))
for i in range(num_iters):
g = grad(x, i)
if callback: callback(x, i, g)
if callback:
converged = callback(x, i, g)
if converged: break
m = (1 - b1) * g + b1 * m # First moment estimate.
v = (1 - b2) * (g**2) + b2 * v # Second moment estimate.
mhat = m / (1 - b1**(i + 1)) # Bias correction.
Expand Down
47 changes: 47 additions & 0 deletions examples/optimizer_convergence.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
from __future__ import absolute_import
from __future__ import print_function
from builtins import range
import autograd.numpy as np
from autograd import grad
from autograd.misc.optimizers import adam
from autograd.test_util import check_grads

def sigmoid(x):
return 0.5*(np.tanh(x) + 1)

def logistic_predictions(weights, inputs):
# Outputs probability of a label being true according to logistic model.
return sigmoid(np.dot(inputs, weights))

def training_loss(weights, t=None):
# Training loss is the negative log-likelihood of the training labels.
preds = logistic_predictions(weights, inputs)
label_probabilities = preds * targets + (1 - preds) * (1 - targets)
return -np.sum(np.log(label_probabilities))

def callback(params, t, g):
# The return value of the callback function indicates convergence
tol = 1e-5
converged = False

# If the gradients take a very small value, the function has converged
# Other convergence criteria may be used as well
if np.sum(np.abs(g)) < tol:
print("Model converged in iteration {}".format(t))
converged = True

return converged

# Build a toy dataset.
inputs = np.random.randn(100, 5)
targets = np.random.randint(0, 2, (100, ))

# Build a function that returns gradients of training loss using autograd.
training_gradient_fun = grad(training_loss)

weights = np.zeros((5, ))

# Optimize weights using gradient descent.
print("Initial loss:", training_loss(weights))
weights = adam(training_gradient_fun, weights, step_size=0.1, num_iters=500, callback=callback)
print("Trained loss:", training_loss(weights))