-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathMathBase.jl
135 lines (109 loc) · 2.99 KB
/
MathBase.jl
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
using AutoGrad: Param, @diff, value, grad
using LinearAlgebra: I, norm
Identity(i;scale=1) = zeros(i,i) + I * scale
sigm(x) = 1 / (1 + exp(-x))
tanh(x) = (begin exp2x = exp(2*x) ; (exp2x-1) / (exp2x+1) end)
relu(x) = max(0, x)
softmax(x) = (begin expx = exp.(x) ; expx ./ sum(expx) end)
cross_entropy(label, prediction) = -(label .* log.(prediction))
binary_cross_entropy(label, prediction) = -(label .* log.(prediction) + (1 .- label) .* log.(1 .- prediction))
mse(label, prediction) = (label - prediction) .^2
xavier(weight; gain=5/3) = (rand(size(weight)) .* 2 .- 1) .* gain .* sqrt(6/sum(size(weight)))
mutable struct Recurrent
wk::Param
# bk::Param
wf::Param
# bf::Param
wn::Param
# bn::Param
wo::Param
# bo::Param
Recurrent(in_size,layer_size) =
begin
wk1 = randn(in_size,layer_size)
wk2 = randn(layer_size,layer_size)
# bk = zeros(1,layer_size)
wf1 = randn(in_size,layer_size)
wf2 = randn(layer_size,layer_size)
# bf = zeros(1,layer_size)
wn1 = randn(in_size,layer_size)
wn2 = randn(layer_size,layer_size)
# bn = zeros(1,layer_size)
wo1 = randn(in_size,layer_size)
wo2 = randn(layer_size,layer_size)
# bo = zeros(1,layer_size)
new(
Param(vcat(wk1,wk2)),
# Param(bk),
Param(vcat(wf1,wf2)),
# Param(bf),
Param(vcat(wn1,wn2)),
# Param(bn),
Param(vcat(wo1,wo2)),
# Param(bo),
)
end
end
(layer::Recurrent)(state, in) =
begin
input = hcat(in, state)
weight = hcat(layer.wk, layer.wf, layer.wn, layer.wo)
result = input * weight
w_length = size(layer.wk,2)
keep = sigm.(result[:,1:w_length])
forget = sigm.(result[:,w_length+1:2*w_length])
new_state = tanh.(result[:,2*w_length+1:3*w_length])
out = tanh.(result[:,3*w_length+1:4*w_length])
state = keep .* new_state + forget .* state
state, out
end
zero_state(layer::Recurrent) = zeros(1,size(layer.wk,2))
mutable struct FeedForward
w::Param
# b::Param
FeedForward(in_size,layer_size) = new(
Param(randn(in_size,layer_size)),
# Param(zeros(1,layer_size)),
)
end
(layer::FeedForward)(in) =
begin
in * layer.w # + layer.b)
end
mutable struct FeedForward_I
w::Param
# b::Param
FeedForward_I(in_size,layer_size) = new(
Param(Identity(in_size)),
# Param(zeros(1,layer_size)),
)
end
(layer::FeedForward_I)(in) =
begin
in * layer.w # + layer.b)
end
prop(model, in) =
begin
for layer in model
in = tanh.(layer(in))
end
in
end
prop(model::Array{FeedForward_I}, in) =
begin
for layer in model
in = layer(in)
end
in
end
prop(model, in; act1=tanh, act2=tanh) =
begin
for layer in model[1:end-1]
in = act1.(layer(in))
end
act2 != nothing ? act2.(model[end](in)) : model[end](in)
end
resize(args...) = reshape(args...)
shape(args...) = size(args...)
expand(arg) = view(arg,[CartesianIndex()],:,:)
stack(args) = vcat(expand.(args)...)