forked from karpathy/micrograd
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathexample_trainer2.py
110 lines (84 loc) · 3.48 KB
/
example_trainer2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
import torch
from micrograd import Value, MLP, OptimizerForComparison, TrainerForComparison
class TorchMLP(torch.nn.Module):
def __init__(self):
super().__init__()
self.fc1 = torch.nn.Linear(2, 3)
self.fc2 = torch.nn.Linear(3, 1)
def forward(self, x):
x = self.fc1(x)
x = torch.relu(x)
x = self.fc2(x)
return x
# Loss function for micrograd
def mean_squared_error(predicted: Value, target: Value) -> Value:
return (predicted - target) ** 2
def initialize_weights_micrograd(model):
for layer in model.layers:
for neuron in layer.neurons:
for weight in neuron.weights:
weight.data = 0.5 # Example fixed value
neuron.bias.data = 0.1 # Example fixed value
def initialize_weights_torch(model):
with torch.no_grad():
model.fc1.weight.fill_(0.5)
model.fc1.bias.fill_(0.1)
model.fc2.weight.fill_(0.5)
model.fc2.bias.fill_(0.1)
def data1():
inputs = [
[Value(1.0), Value(2.0)],
[Value(2.0), Value(3.0)],
[Value(3.0), Value(4.0)],
[Value(4.0), Value(5.0)]
]
targets = [Value(9.0), Value(14.0), Value(19.0), Value(24.0)]
torch_inputs = torch.tensor([[1.0, 2.0], [2.0, 3.0], [3.0, 4.0], [4.0, 5.0]])
torch_targets = torch.tensor([[9.0], [14.0], [19.0], [24.0]])
return inputs, targets, torch_inputs, torch_targets
def data2():
inputs = [[Value(i), Value(i + 1)] for i in range(1, 21)]
targets = [Value(2 * i + 3 * (i + 1) + 1) for i in range(1, 21)]
torch_inputs = torch.tensor([[i, i + 1] for i in range(1, 21)], dtype=torch.float32)
torch_targets = torch.tensor([[2 * i + 3 * (i + 1) + 1] for i in range(1, 21)], dtype=torch.float32)
return inputs, targets, torch_inputs, torch_targets
# @pytest.mark.skipif(True, reason='TODO')
def compare_micrograd_vs_torch():
# Dataset
inputs, targets, torch_inputs, torch_targets = data1()
# Micrograd Model
micrograd_model = MLP(input_size=2, layer_sizes=[3, 1])
micrograd_optimizer = OptimizerForComparison()
micrograd_trainer = TrainerForComparison(
model=micrograd_model,
loss_fn=mean_squared_error,
optimizer=micrograd_optimizer,
num_clones=5
)
# initialize_weights_micrograd(micrograd_model)
EPOCHS = int(10000)
# Train Micrograd Model
micrograd_trainer.train(inputs, targets, epochs=EPOCHS, learning_rate=0.01)
# PyTorch Model
torch_model = TorchMLP()
# initialize_weights_torch(torch_model)
torch_optimizer = torch.optim.SGD(torch_model.parameters(), lr=0.01)
loss_fn = torch.nn.MSELoss()
# Train PyTorch Model
for epoch in range(EPOCHS):
torch_optimizer.zero_grad()
predictions = torch_model(torch_inputs)
loss = loss_fn(predictions, torch_targets)
loss.backward()
torch_optimizer.step()
# Compare Predictions
micrograd_test_input = [Value(5.0), Value(6.0)]
micrograd_prediction = micrograd_model(micrograd_test_input).data
torch_test_input = torch.tensor([[5.0, 6.0]])
torch_prediction = torch_model(torch_test_input).item()
msg = f'micrograd_prediction: {micrograd_prediction} torch_prediction : {torch_prediction}'
print(msg)
# Assert that predictions are close
# assert pytest.approx(micrograd_prediction,
# rel=1e-2) == torch_prediction, f'micrograd_prediction: {micrograd_prediction} torch_prediction : {torch_prediction}'
compare_micrograd_vs_torch()