Skip to content

Commit

Permalink
cross entropy
Browse files Browse the repository at this point in the history
  • Loading branch information
SermetPekin committed Dec 6, 2024
1 parent 2207999 commit fddfec3
Show file tree
Hide file tree
Showing 8 changed files with 405 additions and 20 deletions.
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ ignore*/
data/
FashionMNIST/
raw/
*.pth
42 changes: 42 additions & 0 deletions micrograd/cross_entropy.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
from typing import List

from micrograd.engine import Value
import math

class CrossEntropyLoss:
@staticmethod
def forward(logits: List[Value], target: int) -> Value:
"""
Computes CrossEntropyLoss for a single example.
:param logits: List of Value objects, raw outputs (logits) from the model.
:param target: Integer index of the true class.
:return: Loss Value.
"""
# Step 1: Compute the exponentials of the logits
exp_logits = [logit.exp() for logit in logits]

# Step 2: Compute the sum of the exponentials
sum_exp_logits = sum(exp_logits)

# Step 3: Compute the softmax probabilities
probs = [exp_logit / sum_exp_logits for exp_logit in exp_logits]

# Step 4: Compute the negative log-likelihood loss for the target class
loss = -probs[target].log()

return loss

@staticmethod
def batch_forward(batch_logits: List[List[Value]], batch_targets: List[int]) -> Value:
"""
Computes the average CrossEntropyLoss for a batch.
:param batch_logits: List of List[Value] for all samples in the batch.
:param batch_targets: List of true class indices for the batch.
:return: Average loss Value.
"""
batch_loss = sum(
CrossEntropyLoss.forward(logits, target) for logits, target in zip(batch_logits, batch_targets)
)
return batch_loss / len(batch_targets)
11 changes: 11 additions & 0 deletions micrograd/engine.py
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,17 @@ def _backward():

return out

def log(self) -> 'Value':
"""Logarithm is only defined for positive values."""
clamped_data = max(self.data, 1e-7)
out = Value(math.log(clamped_data), (self,), 'log')

def _backward():
self.grad += (1 / clamped_data) * out.grad

out._backward = _backward
return out

def backward(self) -> None:

# topological order all the children in the graph
Expand Down
5 changes: 3 additions & 2 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
name = "micrograd"
version = "0.2.0"
authors = [
{ name = "Andrej Karpathy", email = "[email protected]" },

{ name = "Sermet Pekin", email = "[email protected]" },
]
description = "A tiny scalar-valued autograd engine with a small PyTorch-like neural network library on top."
Expand All @@ -13,6 +13,7 @@ dependencies = [
"black>=24.10.0",
"graphviz>=0.20.3",
"matplotlib>=3.7.5",
"pandas>=2.2.3",
"pytest>=8.3.4",
"ruff>=0.8.1",
"scikit-learn>=1.3.2",
Expand All @@ -23,7 +24,7 @@ dependencies = [

[project.urls]
Source = "https://github.com/SermetPekin/micrograd"
Original = "https://github.com/karpathy/micrograd"
Inspired-by = "https://github.com/karpathy/micrograd"


[build-system]
Expand Down
33 changes: 33 additions & 0 deletions test/test_crossEntropyLoss.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import pytest
import torch
import torch.nn as nn
from micrograd.engine import Value
from micrograd.cross_entropy import CrossEntropyLoss # Assuming this is your custom implementation

def test_micrograd_vs_torch_cross_entropy():
# Define the logits and targets
logits_micrograd = [[Value(-1.0), Value(-2.0), Value(-3.0)],
[Value(0.5), Value(-1.5), Value(-0.5)]]
targets_micrograd = [0, 2]

# Torch equivalent tensors
logits_torch = torch.tensor([[-1.0, -2.0, -3.0],
[0.5, -1.5, -0.5]], dtype=torch.float32)
targets_torch = torch.tensor([0, 2], dtype=torch.long)

# Compute micrograd loss
loss_micrograd = CrossEntropyLoss.batch_forward(logits_micrograd, targets_micrograd)
micrograd_loss_value = loss_micrograd.data

# Compute torch loss
criterion = nn.CrossEntropyLoss()
loss_torch = criterion(logits_torch, targets_torch)
torch_loss_value = loss_torch.item()

# Print losses for debugging
print(f"Micrograd Loss: {micrograd_loss_value:.4f}")
print(f"Torch Loss: {torch_loss_value:.4f}")

# Assert that the losses are approximately equal
assert abs(micrograd_loss_value - torch_loss_value) < 1e-4, \
f"Losses do not match: Micrograd={micrograd_loss_value}, Torch={torch_loss_value}"
197 changes: 197 additions & 0 deletions torch_e2.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,197 @@
import torch
import torch.nn as nn
import torch.nn.functional as F
from sklearn.model_selection import train_test_split
import pandas as pd
import numpy as np

import matplotlib.pyplot as plt
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor


def init_data():
def fnc(d: str):
dict_ = {
'Setosa': 0,
'Versicolor': 1,
'Virginica': 2,

}
return dict_.get(d, d)

url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"
df = pd.read_csv(url)
df['variety'] = df['variety'].apply(fnc)
return df


def process_data_for_torch(df):
X = df.drop('variety', axis=1)
y = df['variety']
X = X.values
y = y.values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
X_train = torch.FloatTensor(X_train)
X_test = torch.FloatTensor(X_test)
y_train = torch.LongTensor(y_train)
y_test = torch.LongTensor(y_test)
return X_train, X_test, y_train, y_test


class Model(nn.Module):
def __init__(self, in_feats: int = 4, out_feats: int = 3, hidden1=7, hidden2=7):
super(Model, self).__init__()
self.fc1 = nn.Linear(in_feats, hidden1)
self.fc2 = nn.Linear(hidden1, hidden2)
self.out = nn.Linear(hidden2, out_feats)

def forward(self, x):
x = F.relu(self.fc1(x))
x = F.relu(self.fc2(x))
x = self.out(x)
return x


def with_torch(save=True):
df = init_data()
X_train, X_test, y_train, y_test = process_data_for_torch(df)

model = Model()

criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.01)

# model.parameters
epochs = 60
losses = []
for i in range(epochs):
y_pred = model.forward(X_train)
loss = criterion(y_pred, y_train)

losses.append(loss.detach().numpy())

if i % 10 == 0:
print(f'Epoch : {i} and loss : {loss}')

# backprop

optimizer.zero_grad()
loss.backward()
optimizer.step()

if save:
# Save the model
torch.save(model.state_dict(), "iris_model.pth")

with torch.no_grad():
y_eval = model.forward(X_test)
loss = criterion(y_eval, y_test).item()
accuracy = calculate_accuracy(y_eval, y_test)

print(f"Test Loss: {loss}")
print(f"Test Accuracy: {accuracy * 100:.2f}%")
return model, losses


def load_model():
# Load the model
loaded_model = Model()
loaded_model.load_state_dict(torch.load("iris_model.pth"))
loaded_model.eval()
return loaded_model


def calculate_accuracy(_y_pred, _y_true):
y_pred_classes = torch.argmax(_y_pred, axis=1) # Get the predicted class
acc = (y_pred_classes == _y_true).sum().item() / len(_y_true)
return acc


# with_torch()

def with_micrograd():
from micrograd import Value
from micrograd import MLP

# Define the Micrograd model
in_feats = 4 # Input features (Iris dataset)
hidden1 = 7 # Hidden layer 1
hidden2 = 7 # Hidden layer 2
out_feats = 3 # Output classes

model = MLP(in_feats, [hidden1, hidden2, out_feats]) # Equivalent to your PyTorch model
df = init_data()
X_train, X_test, y_train, y_test = process_data_for_torch(df)
# Hyperparameters
learning_rate = 0.01
epochs = 100
losses = []

# # # One-hot encode y_train
# y_train_onehot = np.zeros((y_train.size, out_feats))
# y_train_onehot[np.arange(y_train.size), y_train] = 1
# One-hot encode y_train
y_train_onehot = np.zeros((y_train.shape[0], out_feats)) # Initialize with zeros
y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1 # Set the appropriate index to 1


# Training loop
for epoch in range(epochs):
epoch_loss = 0.0

for i in range(len(X_train)):
# Forward pass
inputs = [Value(x) for x in X_train[i]]
targets = [Value(y) for y in y_train_onehot[i]]
outputs = model(inputs)

# Calculate Cross-Entropy Loss
exp_outputs = [o.exp() for o in outputs]
sum_exp_outputs = sum(exp_outputs)
probs = [o / sum_exp_outputs for o in exp_outputs]
loss = -sum(t * p.log() for t, p in zip(targets, probs))

epoch_loss += loss.data

# Backpropagation
model.zero_grad() # Zero gradients
loss.backward()

# Update weights
for param in model.parameters():
param.data -= learning_rate * param.grad

losses.append(epoch_loss / len(X_train))
if epoch % 10 == 0:
print(f"Epoch {epoch}, Loss: {epoch_loss / len(X_train)}")


# Evaluation
correct = 0
total = len(X_test)

for i in range(len(X_test)):
inputs = [Value(x) for x in X_test[i]]
outputs = model(inputs)
predicted = np.argmax([o.data for o in outputs])
if predicted == y_test[i]:
correct += 1

accuracy = correct / total
print(f"Test Accuracy: {accuracy * 100:.2f}%")

# import matplotlib.pyplot as plt

# Plot the losses
plt.plot(range(epochs), losses, label='Micrograd Loss')
plt.xlabel('Epochs')
plt.ylabel('Loss')
plt.title('Micrograd Loss Curve')
plt.legend()
plt.show()


with_micrograd()
Loading

0 comments on commit fddfec3

Please sign in to comment.