forked from karpathy/micrograd
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
1 parent
2207999
commit fddfec3
Showing
8 changed files
with
405 additions
and
20 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -13,3 +13,4 @@ ignore*/ | |
data/ | ||
FashionMNIST/ | ||
raw/ | ||
*.pth |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,42 @@ | ||
from typing import List | ||
|
||
from micrograd.engine import Value | ||
import math | ||
|
||
class CrossEntropyLoss: | ||
@staticmethod | ||
def forward(logits: List[Value], target: int) -> Value: | ||
""" | ||
Computes CrossEntropyLoss for a single example. | ||
:param logits: List of Value objects, raw outputs (logits) from the model. | ||
:param target: Integer index of the true class. | ||
:return: Loss Value. | ||
""" | ||
# Step 1: Compute the exponentials of the logits | ||
exp_logits = [logit.exp() for logit in logits] | ||
|
||
# Step 2: Compute the sum of the exponentials | ||
sum_exp_logits = sum(exp_logits) | ||
|
||
# Step 3: Compute the softmax probabilities | ||
probs = [exp_logit / sum_exp_logits for exp_logit in exp_logits] | ||
|
||
# Step 4: Compute the negative log-likelihood loss for the target class | ||
loss = -probs[target].log() | ||
|
||
return loss | ||
|
||
@staticmethod | ||
def batch_forward(batch_logits: List[List[Value]], batch_targets: List[int]) -> Value: | ||
""" | ||
Computes the average CrossEntropyLoss for a batch. | ||
:param batch_logits: List of List[Value] for all samples in the batch. | ||
:param batch_targets: List of true class indices for the batch. | ||
:return: Average loss Value. | ||
""" | ||
batch_loss = sum( | ||
CrossEntropyLoss.forward(logits, target) for logits, target in zip(batch_logits, batch_targets) | ||
) | ||
return batch_loss / len(batch_targets) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -2,7 +2,7 @@ | |
name = "micrograd" | ||
version = "0.2.0" | ||
authors = [ | ||
{ name = "Andrej Karpathy", email = "[email protected]" }, | ||
|
||
{ name = "Sermet Pekin", email = "[email protected]" }, | ||
] | ||
description = "A tiny scalar-valued autograd engine with a small PyTorch-like neural network library on top." | ||
|
@@ -13,6 +13,7 @@ dependencies = [ | |
"black>=24.10.0", | ||
"graphviz>=0.20.3", | ||
"matplotlib>=3.7.5", | ||
"pandas>=2.2.3", | ||
"pytest>=8.3.4", | ||
"ruff>=0.8.1", | ||
"scikit-learn>=1.3.2", | ||
|
@@ -23,7 +24,7 @@ dependencies = [ | |
|
||
[project.urls] | ||
Source = "https://github.com/SermetPekin/micrograd" | ||
Original = "https://github.com/karpathy/micrograd" | ||
Inspired-by = "https://github.com/karpathy/micrograd" | ||
|
||
|
||
[build-system] | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,33 @@ | ||
import pytest | ||
import torch | ||
import torch.nn as nn | ||
from micrograd.engine import Value | ||
from micrograd.cross_entropy import CrossEntropyLoss # Assuming this is your custom implementation | ||
|
||
def test_micrograd_vs_torch_cross_entropy(): | ||
# Define the logits and targets | ||
logits_micrograd = [[Value(-1.0), Value(-2.0), Value(-3.0)], | ||
[Value(0.5), Value(-1.5), Value(-0.5)]] | ||
targets_micrograd = [0, 2] | ||
|
||
# Torch equivalent tensors | ||
logits_torch = torch.tensor([[-1.0, -2.0, -3.0], | ||
[0.5, -1.5, -0.5]], dtype=torch.float32) | ||
targets_torch = torch.tensor([0, 2], dtype=torch.long) | ||
|
||
# Compute micrograd loss | ||
loss_micrograd = CrossEntropyLoss.batch_forward(logits_micrograd, targets_micrograd) | ||
micrograd_loss_value = loss_micrograd.data | ||
|
||
# Compute torch loss | ||
criterion = nn.CrossEntropyLoss() | ||
loss_torch = criterion(logits_torch, targets_torch) | ||
torch_loss_value = loss_torch.item() | ||
|
||
# Print losses for debugging | ||
print(f"Micrograd Loss: {micrograd_loss_value:.4f}") | ||
print(f"Torch Loss: {torch_loss_value:.4f}") | ||
|
||
# Assert that the losses are approximately equal | ||
assert abs(micrograd_loss_value - torch_loss_value) < 1e-4, \ | ||
f"Losses do not match: Micrograd={micrograd_loss_value}, Torch={torch_loss_value}" |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,197 @@ | ||
import torch | ||
import torch.nn as nn | ||
import torch.nn.functional as F | ||
from sklearn.model_selection import train_test_split | ||
import pandas as pd | ||
import numpy as np | ||
|
||
import matplotlib.pyplot as plt | ||
from torch.utils.data import DataLoader | ||
from torchvision import datasets | ||
from torchvision.transforms import ToTensor | ||
|
||
|
||
def init_data(): | ||
def fnc(d: str): | ||
dict_ = { | ||
'Setosa': 0, | ||
'Versicolor': 1, | ||
'Virginica': 2, | ||
|
||
} | ||
return dict_.get(d, d) | ||
|
||
url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv" | ||
df = pd.read_csv(url) | ||
df['variety'] = df['variety'].apply(fnc) | ||
return df | ||
|
||
|
||
def process_data_for_torch(df): | ||
X = df.drop('variety', axis=1) | ||
y = df['variety'] | ||
X = X.values | ||
y = y.values | ||
|
||
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2) | ||
X_train = torch.FloatTensor(X_train) | ||
X_test = torch.FloatTensor(X_test) | ||
y_train = torch.LongTensor(y_train) | ||
y_test = torch.LongTensor(y_test) | ||
return X_train, X_test, y_train, y_test | ||
|
||
|
||
class Model(nn.Module): | ||
def __init__(self, in_feats: int = 4, out_feats: int = 3, hidden1=7, hidden2=7): | ||
super(Model, self).__init__() | ||
self.fc1 = nn.Linear(in_feats, hidden1) | ||
self.fc2 = nn.Linear(hidden1, hidden2) | ||
self.out = nn.Linear(hidden2, out_feats) | ||
|
||
def forward(self, x): | ||
x = F.relu(self.fc1(x)) | ||
x = F.relu(self.fc2(x)) | ||
x = self.out(x) | ||
return x | ||
|
||
|
||
def with_torch(save=True): | ||
df = init_data() | ||
X_train, X_test, y_train, y_test = process_data_for_torch(df) | ||
|
||
model = Model() | ||
|
||
criterion = nn.CrossEntropyLoss() | ||
optimizer = torch.optim.Adam(model.parameters(), lr=0.01) | ||
|
||
# model.parameters | ||
epochs = 60 | ||
losses = [] | ||
for i in range(epochs): | ||
y_pred = model.forward(X_train) | ||
loss = criterion(y_pred, y_train) | ||
|
||
losses.append(loss.detach().numpy()) | ||
|
||
if i % 10 == 0: | ||
print(f'Epoch : {i} and loss : {loss}') | ||
|
||
# backprop | ||
|
||
optimizer.zero_grad() | ||
loss.backward() | ||
optimizer.step() | ||
|
||
if save: | ||
# Save the model | ||
torch.save(model.state_dict(), "iris_model.pth") | ||
|
||
with torch.no_grad(): | ||
y_eval = model.forward(X_test) | ||
loss = criterion(y_eval, y_test).item() | ||
accuracy = calculate_accuracy(y_eval, y_test) | ||
|
||
print(f"Test Loss: {loss}") | ||
print(f"Test Accuracy: {accuracy * 100:.2f}%") | ||
return model, losses | ||
|
||
|
||
def load_model(): | ||
# Load the model | ||
loaded_model = Model() | ||
loaded_model.load_state_dict(torch.load("iris_model.pth")) | ||
loaded_model.eval() | ||
return loaded_model | ||
|
||
|
||
def calculate_accuracy(_y_pred, _y_true): | ||
y_pred_classes = torch.argmax(_y_pred, axis=1) # Get the predicted class | ||
acc = (y_pred_classes == _y_true).sum().item() / len(_y_true) | ||
return acc | ||
|
||
|
||
# with_torch() | ||
|
||
def with_micrograd(): | ||
from micrograd import Value | ||
from micrograd import MLP | ||
|
||
# Define the Micrograd model | ||
in_feats = 4 # Input features (Iris dataset) | ||
hidden1 = 7 # Hidden layer 1 | ||
hidden2 = 7 # Hidden layer 2 | ||
out_feats = 3 # Output classes | ||
|
||
model = MLP(in_feats, [hidden1, hidden2, out_feats]) # Equivalent to your PyTorch model | ||
df = init_data() | ||
X_train, X_test, y_train, y_test = process_data_for_torch(df) | ||
# Hyperparameters | ||
learning_rate = 0.01 | ||
epochs = 100 | ||
losses = [] | ||
|
||
# # # One-hot encode y_train | ||
# y_train_onehot = np.zeros((y_train.size, out_feats)) | ||
# y_train_onehot[np.arange(y_train.size), y_train] = 1 | ||
# One-hot encode y_train | ||
y_train_onehot = np.zeros((y_train.shape[0], out_feats)) # Initialize with zeros | ||
y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1 # Set the appropriate index to 1 | ||
|
||
|
||
# Training loop | ||
for epoch in range(epochs): | ||
epoch_loss = 0.0 | ||
|
||
for i in range(len(X_train)): | ||
# Forward pass | ||
inputs = [Value(x) for x in X_train[i]] | ||
targets = [Value(y) for y in y_train_onehot[i]] | ||
outputs = model(inputs) | ||
|
||
# Calculate Cross-Entropy Loss | ||
exp_outputs = [o.exp() for o in outputs] | ||
sum_exp_outputs = sum(exp_outputs) | ||
probs = [o / sum_exp_outputs for o in exp_outputs] | ||
loss = -sum(t * p.log() for t, p in zip(targets, probs)) | ||
|
||
epoch_loss += loss.data | ||
|
||
# Backpropagation | ||
model.zero_grad() # Zero gradients | ||
loss.backward() | ||
|
||
# Update weights | ||
for param in model.parameters(): | ||
param.data -= learning_rate * param.grad | ||
|
||
losses.append(epoch_loss / len(X_train)) | ||
if epoch % 10 == 0: | ||
print(f"Epoch {epoch}, Loss: {epoch_loss / len(X_train)}") | ||
|
||
|
||
# Evaluation | ||
correct = 0 | ||
total = len(X_test) | ||
|
||
for i in range(len(X_test)): | ||
inputs = [Value(x) for x in X_test[i]] | ||
outputs = model(inputs) | ||
predicted = np.argmax([o.data for o in outputs]) | ||
if predicted == y_test[i]: | ||
correct += 1 | ||
|
||
accuracy = correct / total | ||
print(f"Test Accuracy: {accuracy * 100:.2f}%") | ||
|
||
# import matplotlib.pyplot as plt | ||
|
||
# Plot the losses | ||
plt.plot(range(epochs), losses, label='Micrograd Loss') | ||
plt.xlabel('Epochs') | ||
plt.ylabel('Loss') | ||
plt.title('Micrograd Loss Curve') | ||
plt.legend() | ||
plt.show() | ||
|
||
|
||
with_micrograd() |
Oops, something went wrong.