-
Notifications
You must be signed in to change notification settings - Fork 481
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SYSTEMDS-3689] Complete ResNet Integration
These commits add the full integration of ResNets. The forward passes for the basic residual block architecture (ResNet18 & 34) have been previously added. These commits add the forward passes for the bottleneck architecture (ResNet50, 101 & 152) and the backward passes for all architectures. Additionally, since the iteration through the model parameters, especially for the bigger models, gets quite complex, I implemented utiltiy functions for the updating the parameters with differen optimizers. For each model and for each optimizer, a uitlity method to initialize the optimizer state parameters and to update the model parameters have been added. There is also an example added which uses random data to showcase the full training loop of the ResNets (forward, backward, param updating). The example uses ResNet18 but can changed by simply modifying the import statement to another model. Also, the ADAM optimizer is used but I also added all the other optimizers in the example and can be used by uncommenting the respective lines. The example was tested for all ResNets (18, 34, 50, 101, 152) and for all optimizers. Every combination trains correctly (i.e. the loss shrinks). About testing, I've added comprehensive testing for the forward and backward pass of the residual blocks (basic and bottleneck architectures) and for the residual layer's forward pass (multiple residual blocks) but not for the residual layer backward pass and the full networks because the example data would take up to much space. Closes #1992 Closes #2010
- Loading branch information
1 parent
14e2995
commit 505f871
Showing
11 changed files
with
6,053 additions
and
139 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,158 @@ | ||
|
||
#------------------------------------------------------------- | ||
# | ||
# Licensed to the Apache Software Foundation (ASF) under one | ||
# or more contributor license agreements. See the NOTICE file | ||
# distributed with this work for additional information | ||
# regarding copyright ownership. The ASF licenses this file | ||
# to you under the Apache License, Version 2.0 (the | ||
# "License"); you may not use this file except in compliance | ||
# with the License. You may obtain a copy of the License at | ||
# | ||
# http://www.apache.org/licenses/LICENSE-2.0 | ||
# | ||
# Unless required by applicable law or agreed to in writing, | ||
# software distributed under the License is distributed on an | ||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||
# KIND, either express or implied. See the License for the | ||
# specific language governing permissions and limitations | ||
# under the License. | ||
# | ||
#------------------------------------------------------------- | ||
|
||
#------------------------------------------------------------- | ||
# This is a simple example for the use of the ResNets. Here, | ||
# ResNet18 is used. Random data is used to only showcase the | ||
# usage of the ResNets and their implemented utility functions. | ||
# There are utility functions for different optimizers. To | ||
# swtich the used optimizer, comment and uncomment the | ||
# corresponding lines marked with '***' in the initialization | ||
# and training sections. | ||
#------------------------------------------------------------- | ||
|
||
source("scripts/nn/networks/resnet_util.dml") as resnet_util | ||
source("scripts/nn/networks/resnet18.dml") as resnet18 | ||
source("scripts/nn/layers/softmax.dml") as softmax | ||
source("scripts/nn/layers/cross_entropy_loss.dml") as cross_entropy | ||
source("scripts/nn/layers/logcosh_loss.dml") as logcosh | ||
source("nn/optim/adam.dml") as adam | ||
|
||
# model | ||
classes = 1000 | ||
|
||
# get initial model parameters | ||
[model, ema_means_vars] = resnet18::init(classes, -1) | ||
|
||
# get initial optimizer parameters | ||
# choose your optimizer: | ||
# *** adagrad | ||
# optimizer_params = resnet18::init_adagrad_optim_params(classes) | ||
# *** adam | ||
optimizer_params = resnet18::init_adam_optim_params(classes) | ||
# *** rmsprop | ||
# optimizer_params = resnet18::init_rmsprop_optim_params(classes) | ||
# *** sgd | ||
# optimizer_params = list() | ||
# *** sgd momenutum | ||
# optimizer_params = resnet18::init_sgd_momentumg_optim_params(classes) | ||
# *** sgd nesterov | ||
# optimizer_params = resnet18::init_sgd_nesterov_optim_params(classes) | ||
|
||
# create random data | ||
N = 100 | ||
Hin = 32 | ||
Win = 32 | ||
C = 3 # input channels | ||
X = rand(rows=N, cols=Hin*Win*C) | ||
Y = rand(rows=N, cols=classes, min=0, max=1, pdf="normal") | ||
|
||
# train ResNet | ||
epochs = 20 | ||
batch_size = 16 | ||
|
||
[learned_model, learned_emas] = train(X, Y, model, ema_means_vars, N, Hin, Win, epochs, batch_size, optimizer_params) | ||
|
||
|
||
train = function(matrix[double] X, matrix[double] Y, list[unknown] model, list[unknown] emas, int samples, int Hin, | ||
int Win, int epochs, int batch_size, list[unknown] optim_params) | ||
return (list[unknown] learned_model, list[unknown] learned_emas) { | ||
/* | ||
* Trains the model and returns the learned parameters. | ||
*/ | ||
# optimizer hyper parameters | ||
lr = 0.001 | ||
epsilon = 1e-8 | ||
# *** adam | ||
beta1 = 0.9 | ||
beta2 = 0.999 | ||
t = 0 | ||
# *** rmsprop | ||
decay_rate = 0.99 | ||
# sgd momentum & nesterov | ||
momentum = 0.8 | ||
|
||
learned_model = list() | ||
learned_emas = list() | ||
|
||
iterations = ceil(samples/batch_size) | ||
mode = "train" | ||
|
||
for (epoch in 1:epochs) { | ||
loss_avg = 0.0 | ||
|
||
print("Start epoch: " + epoch) | ||
|
||
for (i in 1:iterations) { | ||
print(" - Iteration: " + i) | ||
|
||
# get batch | ||
start = (i - 1) * batch_size + 1 | ||
end = min(samples, i * batch_size) | ||
X_batch = X[start:end,] | ||
Y_batch = Y[start:end,] | ||
|
||
# forward pass | ||
[out, emas, cached_out, cached_means_vars] = resnet18::forward(X_batch, Hin, Win, model, mode, emas) | ||
|
||
# loss | ||
loss = logcosh::forward(out, Y_batch) | ||
loss_avg = (loss_avg * (i - 1) + loss) / i | ||
|
||
# backward | ||
dOut = logcosh::backward(out, Y_batch) | ||
[dX, gradients] = resnet18::backward(dOut, cached_out, model, cached_means_vars) | ||
|
||
# update parameters | ||
# choose your optimizer | ||
# *** adagrad | ||
# [model, optim_params] = resnet18::update_params_with_adagrad(model, gradients, lr, epsilon, optim_params) | ||
# *** adam | ||
[model, optim_params] = resnet18::update_params_with_adam(model, gradients, lr, beta1, beta2, epsilon, | ||
t, optim_params) | ||
t = t + 1 | ||
# *** rmsprop | ||
# [model, optim_params] = resnet18::update_params_with_rmsprop(model, gradients, lr, decay_rate, epsilon, | ||
# optim_params) | ||
# *** sgd | ||
# [model, optim_params] = resnet18::update_params_with_sgd(model, gradients, lr) | ||
# *** sgd momentum | ||
# [model, optim_params] = resnet18::update_params_with_sgd_momentum(model, gradients, lr, momentum, | ||
# optim_params) | ||
# *** sgd nesterov | ||
# [model, optim_params] = resnet18::update_params_with_sgd_nesterov(model, gradients, lr, momentum, | ||
# optim_params) | ||
} | ||
|
||
# reshuffle mini batches | ||
r = rand(rows=nrow(Y), cols=1, min=0, max=1, pdf="uniform") | ||
X_tmp = order(target=cbind(r, X), by=1) | ||
Y_tmp = order(target=cbind(r, Y), by=1) | ||
X = X_tmp[,2:ncol(X_tmp)] | ||
Y = Y_tmp[,2:ncol(Y_tmp)] | ||
|
||
print("Epoch Avg. Loss: " + loss_avg) | ||
} | ||
|
||
learned_model = model | ||
learned_emas = emas | ||
} |
Oops, something went wrong.