forked from m0llyc00k/Thesis-2022
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathjenks.js
152 lines (125 loc) · 5.76 KB
/
jenks.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
// # [Jenks natural breaks optimization](http://en.wikipedia.org/wiki/Jenks_natural_breaks_optimization)
//
// Implementations: [1](http://danieljlewis.org/files/2010/06/Jenks.pdf) (python),
// [2](https://github.com/vvoovv/djeo-jenks/blob/master/main.js) (buggy),
// [3](https://github.com/simogeo/geostats/blob/master/lib/geostats.js#L407) (works)
function jenks(data, n_classes) {
// Compute the matrices required for Jenks breaks. These matrices
// can be used for any classing of data with `classes <= n_classes`
function getMatrices(data, n_classes) {
// in the original implementation, these matrices are referred to
// as `LC` and `OP`
//
// * lower_class_limits (LC): optimal lower class limits
// * variance_combinations (OP): optimal variance combinations for all classes
var lower_class_limits = [],
variance_combinations = [],
// loop counters
i, j,
// the variance, as computed at each step in the calculation
variance = 0;
// Initialize and fill each matrix with zeroes
for (i = 0; i < data.length + 1; i++) {
var tmp1 = [], tmp2 = [];
for (j = 0; j < n_classes + 1; j++) {
tmp1.push(0);
tmp2.push(0);
}
lower_class_limits.push(tmp1);
variance_combinations.push(tmp2);
}
for (i = 1; i < n_classes + 1; i++) {
lower_class_limits[1][i] = 1;
variance_combinations[1][i] = 0;
// in the original implementation, 9999999 is used but
// since Javascript has `Infinity`, we use that.
for (j = 2; j < data.length + 1; j++) {
variance_combinations[j][i] = Infinity;
}
}
for (var l = 2; l < data.length + 1; l++) {
// `SZ` originally. this is the sum of the values seen thus
// far when calculating variance.
var sum = 0,
// `ZSQ` originally. the sum of squares of values seen
// thus far
sum_squares = 0,
// `WT` originally. This is the number of
w = 0,
// `IV` originally
i4 = 0;
// in several instances, you could say `Math.pow(x, 2)`
// instead of `x * x`, but this is slower in some browsers
// introduces an unnecessary concept.
for (var m = 1; m < l + 1; m++) {
// `III` originally
var lower_class_limit = l - m + 1,
val = data[lower_class_limit - 1];
// here we're estimating variance for each potential classing
// of the data, for each potential number of classes. `w`
// is the number of data points considered so far.
w++;
// increase the current sum and sum-of-squares
sum += val;
sum_squares += val * val;
// the variance at this point in the sequence is the difference
// between the sum of squares and the total x 2, over the number
// of samples.
variance = sum_squares - (sum * sum) / w;
i4 = lower_class_limit - 1;
if (i4 !== 0) {
for (j = 2; j < n_classes + 1; j++) {
// if adding this element to an existing class
// will increase its variance beyond the limit, break
// the class at this point, setting the lower_class_limit
// at this point.
if (variance_combinations[l][j] >=
(variance + variance_combinations[i4][j - 1])) {
lower_class_limits[l][j] = lower_class_limit;
variance_combinations[l][j] = variance +
variance_combinations[i4][j - 1];
}
}
}
}
lower_class_limits[l][1] = 1;
variance_combinations[l][1] = variance;
}
// return the two matrices. for just providing breaks, only
// `lower_class_limits` is needed, but variances can be useful to
// evaluage goodness of fit.
return {
lower_class_limits: lower_class_limits,
variance_combinations: variance_combinations
};
}
// the second part of the jenks recipe: take the calculated matrices
// and derive an array of n breaks.
function breaks(data, lower_class_limits, n_classes) {
var k = data.length - 1,
kclass = [],
countNum = n_classes;
// the calculation of classes will never include the upper and
// lower bounds, so we need to explicitly set them
kclass[n_classes] = data[data.length - 1];
kclass[0] = data[0];
// the lower_class_limits matrix is used as indexes into itself
// here: the `k` variable is reused in each iteration.
while (countNum > 1) {
kclass[countNum - 1] = data[lower_class_limits[k][countNum] - 2];
k = lower_class_limits[k][countNum] - 1;
countNum--;
}
return kclass;
}
if (n_classes > data.length) return null;
// sort data in numerical order, since this is expected
// by the matrices function
data = data.slice().sort(function (a, b) { return a - b; });
// get our basic matrices
var matrices = getMatrices(data, n_classes),
// we only need lower class limits here
lower_class_limits = matrices.lower_class_limits;
// extract n_classes out of the computed matrices
return breaks(data, lower_class_limits, n_classes);
}