-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathDigitImage.java
155 lines (126 loc) · 3.58 KB
/
DigitImage.java
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
/*
* Hand Writing Recognition and Simple CAPTCHA Neural Network
* CS 3425 Final project
* Spring 2014
* Min "Ivy" Xing, Zackery Leman
*
* This is a class which stores a single image and corresponding value for the training and testing data.
* It can convert the image data to a binary representation.
*/
import java.util.*;
import java.io.IOException;
import java.lang.Math;
import java.util.ArrayList;
import java.util.List;
public class DigitImage {
// Is the number or letter that is in the image
private int label;
// This vector represents the number or letter that is in the image in a vector form.
private ArrayList<Integer> solutionVector = new ArrayList<Integer>(10);
// This is an array of pixels with value 0 or 1
private double[] data;
//This will be the constructor for MNIST Data
public DigitImage(int label, byte[] data, boolean binary) {
this.label = label;
this.data = new double[data.length];
for (int i = 0; i < this.data.length; i++) {
this.data[i] = data[i] & 0xFF; // convert to unsigned
}
if (binary==true){
otsu();
}
}
//This will be the constructor for captchas
public DigitImage(int label, int[] data, boolean binary) {
this.label = label;
this.data = new double[data.length];
for (int i = 0; i < this.data.length; i++) {
this.data[i] = data[i];
}
}
// Uses Otsu's Threshold algorithm to convert from grayscale to black and white
private void otsu() {
int[] histogram = new int[256];
for (double datum : data) {
histogram[(int) datum]++;
}
double sum = 0;
for (int i = 0; i < histogram.length; i++) {
sum += i * histogram[i];
}
double sumB = 0;
int wB = 0;
int wF = 0;
double maxVariance = 0;
int threshold = 0;
int i = 0;
boolean found = false;
while (i < histogram.length && !found) {
wB += histogram[i];
if (wB != 0) {
wF = data.length - wB;
if (wF != 0) {
sumB += (i * histogram[i]);
double mB = sumB / wB;
double mF = (sum - sumB) / wF;
double varianceBetween = wB * Math.pow((mB - mF), 2);
if (varianceBetween > maxVariance) {
maxVariance = varianceBetween;
threshold = i;
}
}
else {
found = true;
}
}
i++;
}
for (i = 0; i < data.length; i++) {
data[i] = data[i] <= threshold ? 0 : 1;
}
}
//Return number in image
public int getLabel() {
return label;
}
//Return image as a plain array
public double[] getData() {
return data;
}
//Return number in image in vector form
public ArrayList<Integer> getSolutionVector() {
return solutionVector;
}
//Return image as an ArrayList
public ArrayList<Double> getArrayListData() {
ArrayList<Double> doubleList = new ArrayList<Double>(data.length);
for (int index = 0; index < data.length; index++) {
doubleList.add(data[index]);
}
return doubleList;
}
/*
* Creates 10-dimensional unit vector with a 1.0 in the jth position and
* zeroes elsewhere. This is used to convert a digit (0...9) into a
* corresponding desired output from the neural network, making the training
* data easier to work with.
*/
public void vectorizeTrainingData() {
for (int i = 0; i < 10; i++) {
solutionVector.add(0);
}
solutionVector.set((int) label, 1);
}
/*
* Creates 36-dimensional unit vector with a 1.0 in the jth position and
* zeroes elsewhere. This is used to convert a digit (0...z) into a
* corresponding desired output from the neural network, making the training
* data easier to work with.
*/
public void vectorizeTrainingDataAlphaNum() {
for (int i = 0; i < 36; i++) {
solutionVector.add(0);
}
solutionVector.set((int) label, 1);
}
}