torch implementation of micrograd

SermetPekin · Dec 5, 2024 · 2e6b039 · 2e6b039
1 parent f1e9309
commit 2e6b039
Show file tree

Hide file tree

Showing 9 changed files with 138 additions and 26 deletions.
diff --git a/example2.py b/example2.py
@@ -1,6 +1,6 @@
 # Example: Training a simple MLP
 import random
-from typing import List, Union
+from typing import List
 
 from micrograd.engine import Value
 from micrograd.nn import MLP

diff --git a/example_torch_micrograd.py b/example_torch_micrograd.py
@@ -0,0 +1,24 @@
+import torch
+import torch.nn as nn
+# import torch.nn.functional as F
+
+from micrograd.torch_micrograd import train, Optimizer, MLP
+
+# Example Usage
+if __name__ == "__main__":
+    # Create a dataset
+    inputs = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
+    targets = torch.tensor([[9.0], [14.0], [19.0], [24.0]])
+
+    # Define the model, loss, and optimizer
+    model = MLP(input_size=2, layer_sizes=[3, 1])
+    loss_fn = nn.MSELoss()
+    optimizer = Optimizer(model.parameters(), learning_rate=0.01)
+
+    # Train the model
+    train(model, inputs, targets, loss_fn, optimizer, epochs=100)
+
+    # Test the model
+    test_input = torch.tensor([[5.0, 6.0]])
+    prediction = model(test_input)
+    print(f"Prediction for input {test_input.tolist()}: {prediction.tolist()}")
diff --git a/example_trainer2.py b/example_trainer2.py
@@ -1,6 +1,5 @@
-import pytest
 import torch
-from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison, TrainerForComparison
+from micrograd import Value, MLP, OptimizerForComparison, TrainerForComparison
 
 
 class TorchMLP(torch.nn.Module):

diff --git a/micrograd/activation_functions.py b/micrograd/activation_functions.py
@@ -7,7 +7,6 @@
 class Activation:
     @staticmethod
     def relu(value: "Value") -> "Value":
-        from .engine import Value
         return value.relu()
 
     @staticmethod
@@ -16,10 +15,8 @@ def linear(value: "Value") -> "Value":
 
     @staticmethod
     def sigmoid(value: "Value") -> "Value":
-        from .engine import Value
         return value.sigmoid()
 
     @staticmethod
     def tanh(value: "Value") -> "Value":
-        from .engine import Value
         return value.tanh()
diff --git a/micrograd/engine.py b/micrograd/engine.py
@@ -2,7 +2,7 @@
 from typing import Callable, Set, Union
 
 Number = Union[int, float]
-from abc import ABC, abstractmethod
+from abc import ABC
 
 
 class ValueMagics(ABC):
@@ -32,6 +32,11 @@ def __rtruediv__(self, other) -> "Value":  # other / self
     def __repr__(self) -> str:
         return f"Value(data={self.data}, grad={self.grad}, op={self._op})"
 
+    def __format__(self, format_spec):
+        if not format_spec:
+            return f"Value(data={self.data}, grad={self.grad})"
+        return f"Value(data={self.data:{format_spec}}, grad={self.grad:{format_spec}})"
+
     def __add__(self, other: Number | "Value") -> "Value":
         other = other if isinstance(other, Value) else Value(other)
         out = Value(self.data + other.data, (self, other), "+")
@@ -146,7 +151,11 @@ def build_topo(v):
             v._backward()
 
     def exp(self) -> "Value":
-        out = Value(math.exp(self.data), (self,), "exp")
+        v = self.data
+        if isinstance(v, Value):
+            v = v.data
+        v = max(min(v, 700), -700)
+        out = Value(math.exp(v), (self,), "exp")
 
         def _backward():
             self.grad += out.data * out.grad

diff --git a/micrograd/nn.py b/micrograd/nn.py
@@ -1,6 +1,6 @@
 import random
 from typing import List, Callable, Optional
-from abc import ABC, abstractmethod
+from abc import ABC
 
 from micrograd.engine import Value, Weight, Bias
 
@@ -15,7 +15,6 @@ def parameters(self) -> List[Value]:
         return []
 
 
-
 class Neuron(Module):
 
     def __init__(
@@ -83,18 +82,22 @@ def __repr__(self) -> str:
         return f"Layer of [{', '.join(str(neuron) for neuron in self.neurons)}]"
 
 
-class MLP(Module):
+from .activation_functions import Activation
 
-    def __init__(self, input_size: int, layer_sizes: List[int]):
-        from .activation_functions import Activation
 
+class MLP(Module):
+
+    def __init__(self, input_size: int, layer_sizes: List[int],
+                 activation_function_hidden=Activation.relu,
+                 activation_function_output=Activation.linear,
+                 ):
         sizes: List[int] = [input_size] + layer_sizes
         self.layers: List[Layer] = [
             Layer(
                 sizes[i],
                 sizes[i + 1],
                 activation_function=(
-                    Activation.relu if i != len(layer_sizes) - 1 else Activation.linear
+                    activation_function_hidden if i != len(layer_sizes) - 1 else activation_function_output
                 ),
             )
             for i in range(len(layer_sizes))
@@ -198,8 +201,7 @@ def train(
 
                 # Zero gradients for the next iteration
                 self.model.zero_grad()
-
-            print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / len(inputs):.4f}")
+            # print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss / {len(inputs)} :.4f}")
 
 
 class TrainerForComparison:
@@ -237,7 +239,8 @@ def train(
                 print(f"After {self.eval_interval} epochs, best model selected.")
             else:
                 # Train only the best model
-                self._train_one_epoch( self.best_model.number, self.best_model, self.optimizers[0], inputs, targets, learning_rate)
+                self._train_one_epoch(self.best_model.number, self.best_model, self.optimizers[0], inputs, targets,
+                                      learning_rate)
 
     def _train_one_epoch(
             self,

diff --git a/micrograd/torch_micrograd.py b/micrograd/torch_micrograd.py
@@ -0,0 +1,71 @@
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+
+
+class Value:
+    def __init__(self, data):
+        self.data = torch.tensor(data, dtype=torch.float32, requires_grad=True)
+
+    def __repr__(self):
+        return f"Value(data={self.data.item()}, grad={self.data.grad})"
+
+
+class MLP(nn.Module):
+    def __init__(self, input_size, layer_sizes):
+        super().__init__()
+        layers = []
+        sizes = [input_size] + layer_sizes
+        for i in range(len(sizes) - 1):
+            layers.append(nn.Linear(sizes[i], sizes[i + 1]))
+        self.layers = nn.ModuleList(layers)
+
+    def forward(self, x):
+        for i, layer in enumerate(self.layers):
+            x = layer(x)
+            if i < len(self.layers) - 1:  # Apply activation for all but the last layer
+                x = F.relu(x)
+        return x
+
+
+class Optimizer:
+    def __init__(self, parameters, learning_rate=0.01):
+        self.optimizer = torch.optim.SGD(parameters, lr=learning_rate)
+
+    def step(self):
+        self.optimizer.step()
+
+    def zero_grad(self):
+        self.optimizer.zero_grad()
+
+
+def train(model, inputs, targets, loss_fn, optimizer, epochs=100):
+    for epoch in range(epochs):
+        total_loss = 0.0
+
+        # Forward pass
+        predictions = model(inputs)
+        loss = loss_fn(predictions, targets)
+        total_loss += loss.item()
+
+        # Backward pass
+        optimizer.zero_grad()
+        loss.backward()
+        optimizer.step()
+
+        print(f"Epoch {epoch + 1}/{epochs}, Loss: {total_loss:.4f}")
+
+
+if __name__ == "__main__":
+    inputs_ = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
+    targets_ = torch.tensor([[9.0], [14.0], [19.0], [24.0]])
+
+    model_ = MLP(input_size=2, layer_sizes=[3, 1])
+    loss_fn_ = nn.MSELoss()
+    optimizer_ = Optimizer(model_.parameters(), learning_rate=0.01)
+
+    train(model_, inputs_, targets_, loss_fn_, optimizer_, epochs=100)
+
+    test_input = torch.tensor([[5.0, 6.0]])
+    prediction = model_(test_input)
+    print(f"Prediction for input {test_input.tolist()}: {prediction.tolist()}")
diff --git a/test/test_trainer.py b/test/test_trainer.py
@@ -1,6 +1,9 @@
 from micrograd import Activation, Value
 
-from micrograd import Value, MLP, Optimizer , Trainer , OptimizerForComparison
+from micrograd import MLP, Trainer, OptimizerForComparison
+
+import torch
+
 
 # Dataset
 inputs = [
@@ -23,12 +26,8 @@ def mean_squared_error(predicted: Value, target: Value) -> Value:
 
 
 
-import torch
-from micrograd.engine import Value
-
 
 def test_sanity_check_with_trainer():
-
     x = Value(-4.0)
     z = 2 * x + 2 + x
     q = z.relu() + z * x
@@ -51,11 +50,22 @@ def test_sanity_check_with_trainer():
     # backward pass went well
     assert xmg.grad == xpt.grad.item()
 
+
 def test_complete_train(capsys):
     with capsys.disabled():
+        from micrograd.activation_functions import Activation
+
+        # mlp = MLP(
+        #     input_size=2,
+        #     layer_sizes=[3, 1],
+        #     activation_function_hidden=Activation.sigmoid,
+        #     activation_function_output=Activation.tanh,
+        # )
 
         # Model
-        model = MLP(input_size=2, layer_sizes=[3, 1])
+        model = MLP(input_size=2, layer_sizes=[3, 1],
+                    activation_function_hidden=Activation.sigmoid ,
+                    activation_function_output=Activation.linear)
 
         # Optimizer
         optimizer = OptimizerForComparison()
@@ -69,8 +79,7 @@ def test_complete_train(capsys):
         # Test
         test_input = [Value(5.0), Value(6.0)]  # Expected output: 31
         prediction = model(test_input)
-        print(f"Prediction for input {test_input}: {prediction.data:.4f}")
-
+        print(f"Prediction for input {len(test_input)}: {prediction.data:.4f}")
 
 
 def test_relu():

diff --git a/test/test_trainer2.py b/test/test_trainer2.py
@@ -1,6 +1,6 @@
 import pytest
 import torch
-from micrograd import Value, MLP, Optimizer, Trainer, OptimizerForComparison, TrainerForComparison
+from micrograd import Value, MLP, OptimizerForComparison, TrainerForComparison
 
 
 class TorchMLP(torch.nn.Module):