-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathmlp_utils.py
executable file
·181 lines (147 loc) · 7.2 KB
/
mlp_utils.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
import numpy
import theano.tensor as TT
import theano
import time
from utils import softmax
class HiddenLayerStandard(object):
def __init__(self, rng, input, n_in, n_out, activation=TT.nnet.sigmoid,
W=None, b=None, name='tmp'):
"""
Typical hidden layer of a MLP: units are fully-connected and have
sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).
NOTE : The nonlinearity used here is tanh
Hidden unit activation is given by: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.dmatrix
:param input: a symbolic tensor of shape (n_examples, n_in)
:type n_in: int
:param n_in: dimensionality of input
:type n_out: int
:param n_out: number of hidden units
:type activation: theano.Op or function
:param activation: Non linearity to be applied in the hidden
layer
"""
self.input = input
# `W` is initialized with `W_values` which is uniformely sampled
# from sqrt(-6./(n_in+n_hidden)) and sqrt(6./(n_in+n_hidden))
# for tanh activation function
# the output of uniform if converted using asarray to dtype
# theano.config.floatX so that the code is runable on GPU
# Note : optimal initialization of weights is dependent on the
# activation function used (among other things).
# For example, results presented in [Xavier10] suggest that you
# should use 4 times larger initial weights for sigmoid
# compared to tanh
if W is None and b is None:
if activation == theano.tensor.tanh:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
elif activation == theano.tensor.nnet.sigmoid:
W_values = numpy.asarray(4 * rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
else:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
self.W = theano.shared(value=W_values, name=name+'_W')
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name=name+'_b')
else:
self.W = W
self.b = b
self.output = activation(TT.dot(input, self.W) + self.b)
# parameters of the model
self.params = [self.W, self.b]
class HiddenLayerLW(object):
def __init__(self, rng, input, n_in, n_out, activation=TT.tanh,
W=None, b=None, name='tmp'):
"""
Typical hidden layer of a MLP: units are fully-connected and have
sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).
NOTE : The nonlinearity used here is tanh
Hidden unit activation is given by: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.dmatrix
:param input: a symbolic tensor of shape (n_examples, n_in)
:type n_in: int
:param n_in: dimensionality of input
:type n_out: int
:param n_out: number of hidden units
:type activation: theano.Op or function
:param activation: Non linearity to be applied in the hidden
layer
"""
self.input = input
if W is None and b is None:
if activation == theano.tensor.tanh:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
s, v, d = numpy.linalg.svd(W_values, full_matrices=False)
W_values = numpy.dot(s, d)
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
elif activation == theano.tensor.nnet.sigmoid:
W_values = numpy.asarray(4 * rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
s, v, d = numpy.linalg.svd(W_values, full_matrices=False)
W_values = numpy.dot(s * 4, d)
b_values = -2 * numpy.ones((n_out,), dtype=theano.config.floatX)
else:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
self.W = theano.shared(value=W_values, name=name+'_W')
self.b = theano.shared(value=b_values, name=name+'_b')
else:
self.W = W
self.b = b
self.output = activation(TT.dot(input, self.W) + self.b)
# parameters of the model
self.params = [self.W, self.b]
class SoftmaxLayerStandard(object):
def __init__(self, rng, input, n_in, n_out,
W=None, b=None, name='tmp'):
"""
Typical hidden layer of a MLP: units are fully-connected and have
sigmoidal activation function. Weight matrix W is of shape (n_in,n_out)
and the bias vector b is of shape (n_out,).
NOTE : The nonlinearity used here is tanh
Hidden unit activation is given by: tanh(dot(input,W) + b)
:type rng: numpy.random.RandomState
:param rng: a random number generator used to initialize weights
:type input: theano.tensor.dmatrix
:param input: a symbolic tensor of shape (n_examples, n_in)
:type n_in: int
:param n_in: dimensionality of input
:type n_out: int
:param n_out: number of hidden units
"""
self.input = input
if W is None and b is None:
W_values = numpy.asarray(rng.uniform(
low=-numpy.sqrt(6. / (n_in + n_out)),
high=numpy.sqrt(6. / (n_in + n_out)),
size=(n_in, n_out)), dtype=theano.config.floatX)
self.W = theano.shared(value=W_values, name=name+'_W')
b_values = numpy.zeros((n_out,), dtype=theano.config.floatX)
self.b = theano.shared(value=b_values, name=name+'_b')
else:
self.W = W
self.b = b
self.output = softmax(TT.dot(input, self.W) + self.b)
# parameters of the model
self.params = [self.W, self.b]