-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathsimple_nn.h
248 lines (214 loc) · 8.29 KB
/
simple_nn.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
#pragma once
#include <tuple>
#include <random>
#include <cmath>
#include <execution>
#include <functional>
#include "types_helpers.h"
#include "cm_ctors.h"
#include "matrix2d.h"
///should be at least 2 numbers passed - input and output layer,
///more numbers between are sizes of hidden layers
template <class Float, size_t ...Args>
class SimpleLayeredNN
{
public:
static constexpr size_t layers_count = sizeof...(Args);
template<size_t R, size_t C>
using WeightsMatrixT = Matrix2D<Float, R, C>;
constexpr static size_t inputs_count = thelpers::first_v<Args...>();
constexpr static size_t outputs_count = thelpers::last_v<Args...>();
private:
static_assert(std::is_floating_point<Float>::value, "Expecting floating point type only.");
static_assert(layers_count > 1, "Expecting at least 2 additional template parameters.");
//builds tuple of weight matrixes recursively out of template sizes
template <size_t index, class Tuple>
static auto make_weights_reccur(Tuple&& src) noexcept
{
static constexpr bool is = index < layers_count - 1u;
static constexpr auto src_tuple = std::make_tuple(Args...);
static constexpr auto size_frst = thelpers::pop_back(src_tuple);
static constexpr auto size_next = thelpers::pop_front(src_tuple);
if constexpr (!is)
return src;
if constexpr(is)
{
//rows columns are swapped, see book why, rows are next layer and columns current layer
auto tmp = std::make_tuple(WeightsMatrixT<std::get<index>(size_next), std::get<index>(size_frst)>());
return make_weights_reccur<index+1>(std::move(std::tuple_cat(src, tmp)));
}
}
//builds all weight matrices
static auto make_weights() noexcept
{
return make_weights_reccur<0>(std::move(std::tuple{}));
}
//fills single weight matrix with random values, gaussian distribution where
//stddev of it is 1 / root(incoming_connections)
template <size_t R, size_t C>
static void fill_matrix_random(WeightsMatrixT<R, C>& src) noexcept
{
using engine_t = std::conditional< (7 < sizeof(void*)), std::mt19937_64, std::mt19937>::type;
//seeding pseudo random engine by real entropy
engine_t pseudo_rnd(std::random_device{}());
//making distributor
const Float sdev = pow(cast(src.rows()), cast(-0.5f));
std::normal_distribution<Float> dis(cast(0.), sdev);
const auto rnd =[&dis, &pseudo_rnd]()
{
Float v, a;
do
{
v = dis(pseudo_rnd);
a = std::fabs(v);
}
while(a < 0.001 && a > 0.999);
return v;
};
for (auto& v : src)
v = rnd();
}
//recursively applies random for each matrix in tuple
template <class T, class ...Ts>
static void fill_random_1by1(T& left, Ts& ...others) noexcept
{
fill_matrix_random(left);
if constexpr (sizeof...(Ts) > 0)
{
fill_random_1by1(others...);
}
}
//----------------------------------------------------------------------------------
#define NO_COPY_PASTE(NAME) if constexpr (szo < 1) \
{ \
if constexpr (!KeepAll) \
return o; \
if constexpr (KeepAll) \
return std::make_tuple(o); \
} \
if constexpr (szo > 0) \
{ \
if constexpr (KeepAll)\
return std::tuple_cat(std::make_tuple(o), forward<KeepAll>(o, others...));\
if constexpr (!KeepAll)\
return NAME<KeepAll>(o, others...);}
//----------------------------------------------------------------------------------
//if KeepAll = true then it will return all calculations as tuple
//otherwise it will return only last one as single value
template <bool KeepAll, class Inps, class T, class ...Ts>
static decltype(auto) forward(const Inps& inps, T& left, Ts& ...others) noexcept
{
constexpr auto szo = sizeof...(others);
const auto o = activation_function(left.dot(inps));
NO_COPY_PASTE(forward);
}
template <bool KeepAll, class Inps, class T, class ...Ts>
static decltype(auto) backward(const Inps& inps, T& left, Ts& ...others) noexcept
{
constexpr auto szo = sizeof...(others);
const auto o = reverse_activation_function(left.dot(inps));
NO_COPY_PASTE(backward);
}
#undef NO_COPY_PASTE
template <class Mat>
static Mat activation_function(const Mat& src) noexcept
{
constexpr static Float one = cast(1.f);
Mat res;
//FIXME: doing parallel here shows data-race by thread sanitizer, not sure why yet...
std::transform(std::execution::par_unseq, src.begin(), src.end(), res.begin(), [](const Float& x)
{
return one / (one + cast(exp(-x)));
});
return res;
}
template <class Mat>
static Mat reverse_activation_function(const Mat& src) noexcept
{
constexpr static Float one = cast(1.f);
Mat res;
std::transform(std::execution::par_unseq, src.begin(), src.end(), res.begin(), [](const Float& y)->Float
{
return log1p(static_cast<Float>(y / (one - y)));
});
return res;
}
template <size_t Index, class IniErrs, class ...Tw>
static auto build_errors(IniErrs&& errs, const std::tuple<Tw...>& w)
{
constexpr auto max_size = sizeof...(Tw) - 1;
constexpr bool keep_recurse = Index < max_size;
if constexpr (keep_recurse)
{
auto newerr = std::make_tuple(std::get<Index>(w).transpose().dot(std::get<Index>(errs)));
return build_errors<Index+1>(std::tuple_cat(std::move(errs), std::move(newerr)), w);
}
if constexpr (!keep_recurse)
return errs;
}
template <size_t Index, class Errors, class Outs, class ...Tw>
static void update_weights(const Float learning_rate, const Errors& err, const Outs& outs, std::tuple<Tw...>& w)
{
if constexpr(Index < sizeof...(Tw))
{
{
//separated block, so extra data are deallocated prior recursive call
const auto& o = std::get<Index>(outs);
const auto no = std::get<Index + 1>(outs).transpose();
const auto m1 = std::get<Index>(err) * o * (cast(1) - o);
std::get<Index>(w) += m1.dot(no) * learning_rate;
}
update_weights<Index + 1>(learning_rate, err, outs, w);
}
}
private:
std::invoke_result_t<decltype(&make_weights)> weights{make_weights()};
public:
SimpleLayeredNN() = default;
~SimpleLayeredNN()= default;
DEFAULT_COPYMOVE(SimpleLayeredNN);
///set all weights randomly
SimpleLayeredNN& random_weights() noexcept
{
std::apply([](auto& a, auto& ... b)
{
fill_random_1by1(a, b...);
}, weights);
return *this;
}
template <bool KeepAllOuts = false>
auto query(const VectorRow<Float, inputs_count>& inputs) const noexcept
{
return std::apply([&](auto& a, auto& ... b)
{
return forward<KeepAllOuts>(inputs, a, b...);
}, weights);
}
template <bool KeepAllOuts = false>
auto reverse_query(const VectorRow<Float, outputs_count>& outputs) const noexcept
{
const auto rweights = thelpers::reverse_tuple_ref(weights);
return std::apply([&outputs](auto& a, auto& ... b)
{
return backward<KeepAllOuts>(outputs, a, b...);
}, rweights);
}
void train(const Float learning_rate, const VectorRow<Float, inputs_count>& inputs, const VectorRow<Float, outputs_count>& targets)
{
//outputs from each layer
const auto outputs = query<true>(inputs);
{
//tuples of references to matrices in reverse order
const auto routputs = std::tuple_cat(thelpers::reverse_tuple_ref(outputs), std::tie(inputs));
auto rweights = thelpers::reverse_tuple_ref(weights);
const auto errors = build_errors<0>(std::make_tuple(targets - std::get<0>(routputs)), rweights);
update_weights<0>(learning_rate, errors, routputs, rweights);
}
}
///alias for static_cast<Float> template parameter
template<class Any>
static constexpr Float cast(const Any v) noexcept
{
return static_cast<Float>(v);
}
};