Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

optimizers #23

Open
16shery opened this issue Jun 25, 2023 · 0 comments
Open

optimizers #23

16shery opened this issue Jun 25, 2023 · 0 comments

Comments

@16shery
Copy link

16shery commented Jun 25, 2023

import numpy as np
from keras.datasets import mnist
from sklearn.model_selection import train_test_split

Load MNIST dataset

(X, y), (X_test, y_test) = mnist.load_data()

Subset data to use only class 0 and class 1

indices = np.logical_or(y == 0, y == 1)
X = X[indices]
y = y[indices]

Reshape images to 1D vectors

X = X.reshape(X.shape[0], -1)

Standardize dataset

mean = np.mean(X)
std = np.std(X)
X_std = (X - mean) / std

Split data into training and validation sets

X_train, X_val, y_train, y_val = train_test_split(X_std, y, test_size=0.2, random_state=42)

Define the sigmoid function

def sigmoid(z):
return 1 / (1 + np.exp(-z))
learning_rate = 0.01
num_iterations = 1000

Use L1 regularization with gradient descent optimizer:

lambdas = [0.001, 0.01]

for l in lambdas:
# Initialize the parameters
w = np.zeros(X_train.shape[1])
b = 0

for i in range(num_iterations):
    # Forward pass
    z = np.dot(X_train, w) + b
    y_pred = sigmoid(z)

    # Compute the cost
    cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred)) + l * np.sum(np.abs(w))

    # Backward pass
    dz = y_pred - y_train
    dw = np.dot(X_train.T, dz) / X_train.shape[0] + l * np.sign(w)
    db = np.mean(dz)

    # Update the parameters
    w = w - learning_rate * dw
    b = b - learning_rate * db

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Lambda: {}, Validation accuracy: {}".format(l, accuracy))

Use mini-batch gradient descent optimizer:

batch_sizes = [128, 64]

Define the mini-batch generator function

def minibatch_generator(X, y, batch_size):
num_samples = X.shape[0]
indices = np.arange(num_samples)
np.random.shuffle(indices)
for start_idx in range(0, num_samples - batch_size + 1, batch_size):
excerpt = indices[start_idx:start_idx + batch_size]
yield X[excerpt], y[excerpt]

Train the logistic regression model using mini-batch gradient descent

for batch_size in batch_sizes:
print(f"Batch size: {batch_size}")
w = np.zeros(X_train.shape[1])
b = 0
for i in range(num_iterations):
# Mini-batch generator
batch_generator = minibatch_generator(X_train, y_train, batch_size)

    for batch_X, batch_y in batch_generator:
        # Forward pass
        z = np.dot(batch_X, w) + b
        y_pred = sigmoid(z)

        # Compute the cost
        cost = -np.mean(batch_y * np.log(y_pred) + (1 - batch_y) * np.log(1 - y_pred))

        # Backward pass
        dz = y_pred - batch_y
        dw = np.dot(batch_X.T, dz) / batch_size
        db = np.mean(dz)

        # Update the parameters
        w = w - learning_rate * dw
        b = b - learning_rate * db

# Evaluate the model on the validation data
z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

#RMS Prob optimizer
eps = 1e-8
beta = 0.9
s_w = np.zeros(X_train.shape[1])
s_b = 0

for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)

# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)

# Update the RMSprop parameters
s_w = beta * s_w + (1 - beta) * np.square(dw)
s_b = beta * s_b + (1 - beta) * np.square(db)

# Update the parameters using RMSprop optimizer
w = w - learning_rate * dw / np.sqrt(s_w + eps)
b = b - learning_rate * db / np.sqrt(s_b + eps)

Evaluate the model on the validation data

z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

Adam optimizer:

Initialize Adam optimizer parameters

eps = 1e-8
beta1 = 0.9
beta2 = 0.999
m_w = np.zeros(X_train.shape[1])
m_b = 0
v_w = np.zeros(X_train.shape[1])
v_b = 0

for i in range(num_iterations):
# Forward pass
z = np.dot(X_train, w) + b
y_pred = sigmoid(z)

# Compute the cost
cost = -np.mean(y_train * np.log(y_pred) + (1 - y_train) * np.log(1 - y_pred))

# Backward pass
dz = y_pred - y_train
dw = np.dot(X_train.T, dz) / X_train.shape[0]
db = np.mean(dz)

# Update the Adam optimizer parameters
m_w = beta1 * m_w + (1 - beta1) * dw
m_b = beta1 * m_b + (1 - beta1) * db
v_w = beta2 * v_w + (1 - beta2) * np.square(dw)
v_b = beta2 * v_b + (1 - beta2) * np.square(db)
m_w_hat = m_w / (1 - beta1**(i+1))
m_b_hat = m_b / (1 - beta1**(i+1))
v_w_hat = v_w / (1 - beta2**(i+1))
v_b_hat = v_b / (1 - beta2**(i+1))

# Update the parameters using Adam optimizer
w = w - learning_rate * m_w_hat / (np.sqrt(v_w_hat) + eps)
b = b - learning_rate * m_b_hat / (np.sqrt(v_b_hat) + eps)

Evaluate the model on the validation data

z_val = np.dot(X_val, w) + b
y_val_pred = sigmoid(z_val)
y_val_pred[y_val_pred >= 0.5] = 1
y_val_pred[y_val_pred < 0.5] = 0
accuracy = np.mean(y_val_pred == y_val)
print("Validation accuracy:", accuracy)

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant