cross entropy

SermetPekin · Dec 6, 2024 · fddfec3 · fddfec3
1 parent 2207999
commit fddfec3
Show file tree

Hide file tree

Showing 8 changed files with 405 additions and 20 deletions.
diff --git a/.gitignore b/.gitignore
@@ -13,3 +13,4 @@ ignore*/
 data/
 FashionMNIST/
 raw/
+*.pth
diff --git a/micrograd/cross_entropy.py b/micrograd/cross_entropy.py
@@ -0,0 +1,42 @@
+from typing import List
+
+from micrograd.engine import Value
+import math
+
+class CrossEntropyLoss:
+    @staticmethod
+    def forward(logits: List[Value], target: int) -> Value:
+        """
+        Computes CrossEntropyLoss for a single example.
+
+        :param logits: List of Value objects, raw outputs (logits) from the model.
+        :param target: Integer index of the true class.
+        :return: Loss Value.
+        """
+        # Step 1: Compute the exponentials of the logits
+        exp_logits = [logit.exp() for logit in logits]
+
+        # Step 2: Compute the sum of the exponentials
+        sum_exp_logits = sum(exp_logits)
+
+        # Step 3: Compute the softmax probabilities
+        probs = [exp_logit / sum_exp_logits for exp_logit in exp_logits]
+
+        # Step 4: Compute the negative log-likelihood loss for the target class
+        loss = -probs[target].log()
+
+        return loss
+
+    @staticmethod
+    def batch_forward(batch_logits: List[List[Value]], batch_targets: List[int]) -> Value:
+        """
+        Computes the average CrossEntropyLoss for a batch.
+
+        :param batch_logits: List of List[Value] for all samples in the batch.
+        :param batch_targets: List of true class indices for the batch.
+        :return: Average loss Value.
+        """
+        batch_loss = sum(
+            CrossEntropyLoss.forward(logits, target) for logits, target in zip(batch_logits, batch_targets)
+        )
+        return batch_loss / len(batch_targets)
diff --git a/micrograd/engine.py b/micrograd/engine.py
@@ -130,6 +130,17 @@ def _backward():
 
         return out
 
+    def log(self) -> 'Value':
+        """Logarithm is only defined for positive values."""
+        clamped_data = max(self.data, 1e-7)
+        out = Value(math.log(clamped_data), (self,), 'log')
+
+        def _backward():
+            self.grad += (1 / clamped_data) * out.grad
+
+        out._backward = _backward
+        return out
+
     def backward(self) -> None:
 
         # topological order all the children in the graph

diff --git a/pyproject.toml b/pyproject.toml
@@ -2,7 +2,7 @@
 name = "micrograd"
 version = "0.2.0"
 authors = [
-    { name = "Andrej Karpathy", email = "[email protected]" },
+
     { name = "Sermet Pekin", email = "[email protected]" },
 ]
 description = "A tiny scalar-valued autograd engine with a small PyTorch-like neural network library on top."
@@ -13,6 +13,7 @@ dependencies = [
     "black>=24.10.0",
     "graphviz>=0.20.3",
     "matplotlib>=3.7.5",
+    "pandas>=2.2.3",
     "pytest>=8.3.4",
     "ruff>=0.8.1",
     "scikit-learn>=1.3.2",
@@ -23,7 +24,7 @@ dependencies = [
 
 [project.urls]
 Source = "https://github.com/SermetPekin/micrograd"
-Original = "https://github.com/karpathy/micrograd"
+Inspired-by = "https://github.com/karpathy/micrograd"
 
 
 [build-system]

diff --git a/test/test_crossEntropyLoss.py b/test/test_crossEntropyLoss.py
@@ -0,0 +1,33 @@
+import pytest
+import torch
+import torch.nn as nn
+from micrograd.engine import Value
+from micrograd.cross_entropy import CrossEntropyLoss  # Assuming this is your custom implementation
+
+def test_micrograd_vs_torch_cross_entropy():
+    # Define the logits and targets
+    logits_micrograd = [[Value(-1.0), Value(-2.0), Value(-3.0)],
+                        [Value(0.5), Value(-1.5), Value(-0.5)]]
+    targets_micrograd = [0, 2]
+
+    # Torch equivalent tensors
+    logits_torch = torch.tensor([[-1.0, -2.0, -3.0],
+                                 [0.5, -1.5, -0.5]], dtype=torch.float32)
+    targets_torch = torch.tensor([0, 2], dtype=torch.long)
+
+    # Compute micrograd loss
+    loss_micrograd = CrossEntropyLoss.batch_forward(logits_micrograd, targets_micrograd)
+    micrograd_loss_value = loss_micrograd.data
+
+    # Compute torch loss
+    criterion = nn.CrossEntropyLoss()
+    loss_torch = criterion(logits_torch, targets_torch)
+    torch_loss_value = loss_torch.item()
+
+    # Print losses for debugging
+    print(f"Micrograd Loss: {micrograd_loss_value:.4f}")
+    print(f"Torch Loss: {torch_loss_value:.4f}")
+
+    # Assert that the losses are approximately equal
+    assert abs(micrograd_loss_value - torch_loss_value) < 1e-4, \
+        f"Losses do not match: Micrograd={micrograd_loss_value}, Torch={torch_loss_value}"
diff --git a/torch_e2.py b/torch_e2.py
@@ -0,0 +1,197 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from sklearn.model_selection import train_test_split
+import pandas as pd
+import numpy as np
+
+import matplotlib.pyplot as plt
+from torch.utils.data import DataLoader
+from torchvision import datasets
+from torchvision.transforms import ToTensor
+
+
+def init_data():
+    def fnc(d: str):
+        dict_ = {
+            'Setosa': 0,
+            'Versicolor': 1,
+            'Virginica': 2,
+
+        }
+        return dict_.get(d, d)
+
+    url = "https://gist.githubusercontent.com/netj/8836201/raw/6f9306ad21398ea43cba4f7d537619d0e07d5ae3/iris.csv"
+    df = pd.read_csv(url)
+    df['variety'] = df['variety'].apply(fnc)
+    return df
+
+
+def process_data_for_torch(df):
+    X = df.drop('variety', axis=1)
+    y = df['variety']
+    X = X.values
+    y = y.values
+
+    X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
+    X_train = torch.FloatTensor(X_train)
+    X_test = torch.FloatTensor(X_test)
+    y_train = torch.LongTensor(y_train)
+    y_test = torch.LongTensor(y_test)
+    return X_train, X_test, y_train, y_test
+
+
+class Model(nn.Module):
+    def __init__(self, in_feats: int = 4, out_feats: int = 3, hidden1=7, hidden2=7):
+        super(Model, self).__init__()
+        self.fc1 = nn.Linear(in_feats, hidden1)
+        self.fc2 = nn.Linear(hidden1, hidden2)
+        self.out = nn.Linear(hidden2, out_feats)
+
+    def forward(self, x):
+        x = F.relu(self.fc1(x))
+        x = F.relu(self.fc2(x))
+        x = self.out(x)
+        return x
+
+
+def with_torch(save=True):
+    df = init_data()
+    X_train, X_test, y_train, y_test = process_data_for_torch(df)
+
+    model = Model()
+
+    criterion = nn.CrossEntropyLoss()
+    optimizer = torch.optim.Adam(model.parameters(), lr=0.01)
+
+    # model.parameters
+    epochs = 60
+    losses = []
+    for i in range(epochs):
+        y_pred = model.forward(X_train)
+        loss = criterion(y_pred, y_train)
+
+        losses.append(loss.detach().numpy())
+
+        if i % 10 == 0:
+            print(f'Epoch : {i} and loss : {loss}')
+
+        # backprop
+
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+    if save:
+        # Save the model
+        torch.save(model.state_dict(), "iris_model.pth")
+
+    with torch.no_grad():
+        y_eval = model.forward(X_test)
+        loss = criterion(y_eval, y_test).item()
+        accuracy = calculate_accuracy(y_eval, y_test)
+
+        print(f"Test Loss: {loss}")
+        print(f"Test Accuracy: {accuracy * 100:.2f}%")
+        return model, losses
+
+
+def load_model():
+    # Load the model
+    loaded_model = Model()
+    loaded_model.load_state_dict(torch.load("iris_model.pth"))
+    loaded_model.eval()
+    return loaded_model
+
+
+def calculate_accuracy(_y_pred, _y_true):
+    y_pred_classes = torch.argmax(_y_pred, axis=1)  # Get the predicted class
+    acc = (y_pred_classes == _y_true).sum().item() / len(_y_true)
+    return acc
+
+
+# with_torch()
+
+def with_micrograd():
+    from micrograd import Value
+    from micrograd import MLP
+
+    # Define the Micrograd model
+    in_feats = 4  # Input features (Iris dataset)
+    hidden1 = 7  # Hidden layer 1
+    hidden2 = 7  # Hidden layer 2
+    out_feats = 3  # Output classes
+
+    model = MLP(in_feats, [hidden1, hidden2, out_feats])  # Equivalent to your PyTorch model
+    df = init_data()
+    X_train, X_test, y_train, y_test = process_data_for_torch(df)
+    # Hyperparameters
+    learning_rate = 0.01
+    epochs = 100
+    losses = []
+
+    # # # One-hot encode y_train
+    # y_train_onehot = np.zeros((y_train.size, out_feats))
+    # y_train_onehot[np.arange(y_train.size), y_train] = 1
+    # One-hot encode y_train
+    y_train_onehot = np.zeros((y_train.shape[0], out_feats))  # Initialize with zeros
+    y_train_onehot[np.arange(y_train.shape[0]), y_train] = 1  # Set the appropriate index to 1
+
+
+    # Training loop
+    for epoch in range(epochs):
+        epoch_loss = 0.0
+
+        for i in range(len(X_train)):
+            # Forward pass
+            inputs = [Value(x) for x in X_train[i]]
+            targets = [Value(y) for y in y_train_onehot[i]]
+            outputs = model(inputs)
+
+            # Calculate Cross-Entropy Loss
+            exp_outputs = [o.exp() for o in outputs]
+            sum_exp_outputs = sum(exp_outputs)
+            probs = [o / sum_exp_outputs for o in exp_outputs]
+            loss = -sum(t * p.log() for t, p in zip(targets, probs))
+
+            epoch_loss += loss.data
+
+            # Backpropagation
+            model.zero_grad()  # Zero gradients
+            loss.backward()
+
+            # Update weights
+            for param in model.parameters():
+                param.data -= learning_rate * param.grad
+
+        losses.append(epoch_loss / len(X_train))
+        if epoch % 10 == 0:
+            print(f"Epoch {epoch}, Loss: {epoch_loss / len(X_train)}")
+
+
+    # Evaluation
+    correct = 0
+    total = len(X_test)
+
+    for i in range(len(X_test)):
+        inputs = [Value(x) for x in X_test[i]]
+        outputs = model(inputs)
+        predicted = np.argmax([o.data for o in outputs])
+        if predicted == y_test[i]:
+            correct += 1
+
+    accuracy = correct / total
+    print(f"Test Accuracy: {accuracy * 100:.2f}%")
+
+    # import matplotlib.pyplot as plt
+
+    # Plot the losses
+    plt.plot(range(epochs), losses, label='Micrograd Loss')
+    plt.xlabel('Epochs')
+    plt.ylabel('Loss')
+    plt.title('Micrograd Loss Curve')
+    plt.legend()
+    plt.show()
+
+
+with_micrograd()
-Original file line number
+Diff line change
@@ Expand Up / @@ -13,3 +13,4 @@ ignore*/ @@
     data/
     FashionMNIST/
     raw/
+    *.pth