Skip to content

Commit

Permalink
Tried making more RAM friendly, is now broken.
Browse files Browse the repository at this point in the history
  • Loading branch information
YafahEdelman committed Jun 3, 2015
1 parent 716837a commit 477b19a
Show file tree
Hide file tree
Showing 3 changed files with 73 additions and 5 deletions.
24 changes: 20 additions & 4 deletions train.lua
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ require 'optim'
require 'lfs'

require 'util.OneHot'
require 'util.Embedding'
require 'util.misc'

local model_utils = require 'util.model_utils'
Expand Down Expand Up @@ -87,9 +88,19 @@ if not path.exists(opt.checkpoint_dir) then lfs.mkdir(opt.checkpoint_dir) end

-- define the model: prototypes for one timestep, then clone them in time
protos = {}
protos.embed = OneHot(vocab_size)
local embeded_size = 100
local input_size, embeded_size
if opt.words then
print('using an embedding transform for input...')
embeded_size = 100
protos.embed = Embedding(vocab_size, embeded_size)
else
print('using one-hot for input...')
embeded_size = vocab_size
protos.embed = OneHot(vocab_size)
end
print('creating an LSTM with ' .. opt.num_layers .. ' layers')
protos.rnn = LSTM.lstm(vocab_size, opt.rnn_size, opt.num_layers, opt.dropout)
protos.rnn = LSTM.lstm(embeded_size, opt.rnn_size, opt.num_layers, opt.dropout)
-- the initial state of the cell/hidden states
init_state = {}
for L=1,opt.num_layers do
Expand All @@ -100,7 +111,7 @@ for L=1,opt.num_layers do
end
state_predict_index = #init_state -- index of blob to make prediction from
-- classifier on top
protos.softmax = nn.Sequential():add(nn.Linear(opt.rnn_size, vocab_size)):add(nn.LogSoftMax())
protos.softmax = nn.Sequential():add(nn.Linear(opt.rnn_size, embeded_size)):add(nn.LogSoftMax())
-- training criterion (negative log likelihood)
protos.criterion = nn.ClassNLLCriterion()

Expand Down Expand Up @@ -182,7 +193,10 @@ function feval(x)
rnn_state[t] = clones.rnn[t]:forward{embeddings[t], unpack(rnn_state[t-1])}
-- the following line is needed because nngraph tries to be clever
if type(rnn_state[t]) ~= 'table' then rnn_state[t] = {rnn_state[t]} end

predictions[t] = clones.softmax[t]:forward(rnn_state[t][state_predict_index])

-- predictions should be 200 me thinks
loss = loss + clones.criterion[t]:forward(predictions[t], y[{{}, t}])
end
loss = loss / opt.seq_length
Expand Down Expand Up @@ -227,16 +241,18 @@ local optim_state = {learningRate = opt.learning_rate, alpha = opt.decay_rate}
local iterations = opt.max_epochs * loader.ntrain
local iterations_per_epoch = loader.ntrain
local loss0 = nil

for i = 1, iterations do

local epoch = i / loader.ntrain

local timer = torch.Timer()

local _, loss = optim.rmsprop(feval, params, optim_state)
local time = timer:time().real

local train_loss = loss[1] -- the loss is inside a list, pop it
train_losses[i] = train_loss

-- every now and then or on last iteration
if i % opt.eval_val_every == 0 or i == iterations then
-- evaluate loss on validation data
Expand Down
53 changes: 53 additions & 0 deletions util/Embedding.lua
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
--[[
Copyright 2014 Google Inc. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
]]--

local Embedding, parent = torch.class('Embedding', 'nn.Module')

function Embedding:__init(inputSize, outputSize)
parent.__init(self)
self.outputSize = outputSize
self.weight = torch.Tensor(inputSize, outputSize)
self.gradWeight = torch.Tensor(inputSize, outputSize)
end

function Embedding:updateOutput(input)
self.output:resize(input:size(1), self.outputSize)
for i = 1, input:size(1) do
self.output[i]:copy(self.weight[input[i]])
end
return self.output
end

function Embedding:updateGradInput(input, gradOutput)
if self.gradInput then
self.gradInput:resize(input:size())
return self.gradInput
end
end

function Embedding:accGradParameters(input, gradOutput, scale)
scale = scale or 1
if scale == 0 then
self.gradWeight:zero()
end
for i = 1, input:size(1) do
local word = input[i]
self.gradWeight[word]:add(gradOutput[i])
end
end

-- we do not need to accumulate parameters when sharing
Embedding.sharedAccUpdateGradParameters = Embedding.accUpdateGradParameters
1 change: 0 additions & 1 deletion util/model_utils.lua
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,6 @@ function model_utils.clone_many_times(net, T)
params = {}
end
end

local paramsNoGrad
if net.parametersNoGrad then
paramsNoGrad = net:parametersNoGrad()
Expand Down

0 comments on commit 477b19a

Please sign in to comment.