-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathclean_data.js
60 lines (47 loc) · 1.97 KB
/
clean_data.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
var lineReader = require('readline').createInterface({
input: require('fs').createReadStream('english_removed.txt')
});
lineReader.on('line', function (line) {
if (line.includes('?')){line = ''}
line = line.replace(/> </g,'><');
line = line.replace(/><.*>/g, '>')
line = line.replace(/</g,'');
line = line.replace(/>/g,'');
line = line.replace(/\[.*?\]/g, '')
if (line.toLowerCase().substring(0,4) == 'what'){line = ''}
if (line.toLowerCase().substring(0,6) == 'how to'){line = ''}
if (line.toLowerCase().substring(line.length-3,line.length) == '...'){line = ''}
if (line.toLowerCase().includes('grammar')){ line = ''}
if (line.toLowerCase().includes('pronunciation')){line = ''}
if (line.toLowerCase().includes('punctuation')){line = ''}
if (line.toLowerCase().includes('punctuate')){line = ''}
if (line.toLowerCase().includes('English')){line = ''}
if (line.toLowerCase().includes('sentence')){line = ''}
if (line.toLowerCase().includes('verb')){line = ''}
if (line.toLowerCase().includes('adjective')){line = ''}
if (line.toLowerCase().includes('noun')){line = ''}
if (line.toLowerCase().includes('question')){line = ''}
if (line.toLowerCase().includes(' or ')){line = ''}
if (line.toLowerCase().includes('gerund')){line = ''}
if (line.toLowerCase().includes('tense')){line = ''}
if (line.toLowerCase().includes(':')){line = ''}
if (line.toLowerCase().includes(' vs ')){line = ''}
if (line.toLowerCase().includes(' v ')){line = ''}
if (line.toLowerCase().includes('perfect')){line = ''}
line = line.replace(' / ','/');
line = line.replace(' /','/');
line = line.replace('/ ','/');
line = line.replace(/(/g,'');
line = line.replace(/)/g,'');
line = line.replace(/"/g,'');
if (line.indexOf('/') > 0){
line = line.substring(0, line.indexOf('/'));}
if (line.indexOf('\\') > 0){
line = line.substring(0, line.indexOf('/'));}
if (line.indexOf('vs') > 0){
line = line.substring(0, line.indexOf('vs'));}
line = line.replace(/ /g,' ');
///if (line.toLowerCase().includes('church')){
console.log(line)
//}
});