OptimizerForComp

SermetPekin · Dec 5, 2024 · ebe17fd · ebe17fd
1 parent c865e31
commit ebe17fd
Show file tree

Hide file tree

Showing 4 changed files with 180 additions and 6 deletions.
diff --git a/micrograd/__init__.py b/micrograd/__init__.py
@@ -1,5 +1,5 @@
 from micrograd.engine import Value
-from micrograd.nn import MLP, Neuron, Trainer, Optimizer
+from micrograd.nn import MLP, Neuron, Trainer, Optimizer, OptimizerForComparison
 from micrograd.graph import draw_dot
 from micrograd.activation_functions import Activation
 

diff --git a/micrograd/nn.py b/micrograd/nn.py
@@ -25,7 +25,7 @@ def __init__(
 
         if activation_function is None:
             self.activation_function: Optional[Callable[[Value], Value]] = (
-                Activation.linear
+                Activation.relu
             )
         else:
             self.activation_function: Optional[Callable[[Value], Value]] = (
@@ -111,19 +111,59 @@ def __repr__(self) -> str:
         return f"MLP of [{', '.join(str(layer) for layer in self.layers)}]"
 
 
-class Optimizer:
+from abc import ABC, abstractmethod
+
+
+class OptimizerAbstract(ABC):
+    """OptimizerAbstract"""
+
+
+class Optimizer(OptimizerAbstract):
     @staticmethod
     def step(parameters: List[Value], learning_rate: float) -> None:
         for param in parameters:
             param.data -= learning_rate * param.grad
 
 
+class OptimizerForComparison(OptimizerAbstract):
+    def __init__(self, parameters: List[Value] = (), learning_rate: float = 0.01, momentum: float = 0.0,
+                 weight_decay: float = 0.0):
+        self.parameters = parameters
+        self.learning_rate = learning_rate
+        self.momentum = momentum
+        self.weight_decay = weight_decay
+        self.velocities = {}  # {id(param): 0.0 for param in parameters}  # Momentum storage
+
+    def step(self, parameters=(), learning_rate: float = None) -> None:
+        if parameters:
+            self.parameters = parameters
+
+        if learning_rate:
+            self.learning_rate = learning_rate
+
+        for param in self.parameters:
+
+            if id(param) not in self.velocities:
+                self.velocities[id(param)] = 0.0
+
+            # Apply weight decay (L2 regularization)
+            if self.weight_decay > 0:
+                param.grad += self.weight_decay * param.data
+
+            # Apply momentum
+            velocity = self.momentum * self.velocities[id(param)] - self.learning_rate * param.grad
+            self.velocities[id(param)] = velocity
+
+            # Update parameters
+            param.data += velocity
+
+
 class Trainer:
     def __init__(
             self,
             model: Module,
             loss_fn: Callable[[Value, Value], Value],
-            optimizer: Optimizer,
+            optimizer: OptimizerAbstract,
     ):
         self.model = model
         self.loss_fn = loss_fn

diff --git a/test/test_trainer.py b/test/test_trainer.py
@@ -1,6 +1,6 @@
 from micrograd import Activation, Value
 
-from micrograd import Value, MLP, Optimizer , Trainer
+from micrograd import Value, MLP, Optimizer , Trainer , OptimizerForComparison
 
 # Dataset
 inputs = [
@@ -22,14 +22,43 @@ def mean_squared_error(predicted: Value, target: Value) -> Value:
     return (predicted - target) ** 2
 
 
+
+import torch
+from micrograd.engine import Value
+
+
+def test_sanity_check_with_trainer():
+
+    x = Value(-4.0)
+    z = 2 * x + 2 + x
+    q = z.relu() + z * x
+    h = (z * z).relu()
+    y = h + q + q * x
+    y.backward()
+    xmg, ymg = x, y
+
+    x = torch.Tensor([-4.0]).double()
+    x.requires_grad = True
+    z = 2 * x + 2 + x
+    q = z.relu() + z * x
+    h = (z * z).relu()
+    y = h + q + q * x
+    y.backward()
+    xpt, ypt = x, y
+
+    # forward pass went well
+    assert ymg.data == ypt.data.item()
+    # backward pass went well
+    assert xmg.grad == xpt.grad.item()
+
 def test_complete_train(capsys):
     with capsys.disabled():
 
         # Model
         model = MLP(input_size=2, layer_sizes=[3, 1])
 
         # Optimizer
-        optimizer = Optimizer()
+        optimizer = OptimizerForComparison()
 
         # Trainer
         trainer = Trainer(model=model, loss_fn=mean_squared_error, optimizer=optimizer)

diff --git a/test/test_trainer2.py b/test/test_trainer2.py
@@ -0,0 +1,105 @@
+import pytest
+import torch
+from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison
+
+
+class TorchMLP(torch.nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.fc1 = torch.nn.Linear(2, 3)
+        self.fc2 = torch.nn.Linear(3, 1)
+
+    def forward(self, x):
+        x = self.fc1(x)
+        x = torch.relu(x)
+        x = self.fc2(x)
+        return x
+
+
+# Loss function for micrograd
+def mean_squared_error(predicted: Value, target: Value) -> Value:
+    return (predicted - target) ** 2
+
+
+def initialize_weights_micrograd(model):
+    for layer in model.layers:
+        for neuron in layer.neurons:
+            for weight in neuron.weights:
+                weight.data = 0.5  # Example fixed value
+            neuron.bias.data = 0.1  # Example fixed value
+
+
+def initialize_weights_torch(model):
+    with torch.no_grad():
+        model.fc1.weight.fill_(0.5)
+        model.fc1.bias.fill_(0.1)
+        model.fc2.weight.fill_(0.5)
+        model.fc2.bias.fill_(0.1)
+
+
+def data1():
+    inputs = [
+        [Value(1.0), Value(2.0)],
+        [Value(2.0), Value(3.0)],
+        [Value(3.0), Value(4.0)],
+        [Value(4.0), Value(5.0)]
+    ]
+    targets = [Value(9.0), Value(14.0), Value(19.0), Value(24.0)]
+
+    torch_inputs = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
+    torch_targets = torch.tensor([[9.0], [14.0], [19.0], [24.0]])
+    return inputs, targets, torch_inputs, torch_targets
+
+
+def data2():
+    inputs = [[Value(i), Value(i + 1)] for i in range(1, 21)]
+    targets = [Value(2 * i + 3 * (i + 1) + 1) for i in range(1, 21)]
+    torch_inputs = torch.tensor([[i, i + 1] for i in range(1, 21)], dtype=torch.float32)
+    torch_targets = torch.tensor([[2 * i + 3 * (i + 1) + 1] for i in range(1, 21)], dtype=torch.float32)
+    return inputs, targets, torch_inputs, torch_targets
+
+
+@pytest.mark.skipif(True, reason='TODO')
+def test_micrograd_vs_torch():
+    # Dataset
+    inputs, targets, torch_inputs, torch_targets = data1()
+
+    # Micrograd Model
+    micrograd_model = MLP(input_size=2, layer_sizes=[3, 1])
+    micrograd_optimizer = OptimizerForComparison()
+    micrograd_trainer = Trainer(
+        model=micrograd_model,
+        loss_fn=mean_squared_error,
+        optimizer=micrograd_optimizer
+    )
+
+    # initialize_weights_micrograd(micrograd_model)
+
+    EPOCHS = int(100)
+    # Train Micrograd Model
+    micrograd_trainer.train(inputs, targets, epochs=EPOCHS, learning_rate=0.01)
+
+    # PyTorch Model
+    torch_model = TorchMLP()
+    # initialize_weights_torch(torch_model)
+    torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=0.01)
+    loss_fn = torch.nn.MSELoss()
+
+    # Train PyTorch Model
+    for epoch in range(EPOCHS):
+        torch_optimizer.zero_grad()
+        predictions = torch_model(torch_inputs)
+        loss = loss_fn(predictions, torch_targets)
+        loss.backward()
+        torch_optimizer.step()
+
+    # Compare Predictions
+    micrograd_test_input = [Value(5.0), Value(6.0)]
+    micrograd_prediction = micrograd_model(micrograd_test_input).data
+
+    torch_test_input = torch.tensor([[5.0, 6.0]])
+    torch_prediction = torch_model(torch_test_input).item()
+
+    # Assert that predictions are close
+    assert pytest.approx(micrograd_prediction,
+                         rel=1e-2) == torch_prediction, f'micrograd_prediction: {micrograd_prediction} torch_prediction :  {torch_prediction}'