forked from harthur/glossary
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathkeywords.js
52 lines (44 loc) · 1.42 KB
/
keywords.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
var _ = require('underscore')._,
LanguageDetect = require('languagedetect'),
lngDetector = new LanguageDetect(),
glossary = require("./glossary"),
language = readAllStopwordFilesSync("./stopwords");
function extract(text, options){
var textLang = lngDetector.detect(text,1);
var terms = glossary.extract(text, options);
if(_.contains(_.keys(language),textLang[0][0])){
terms = _(terms).reject(function(term) {
return _(language[textLang[0][0]]).any(function(stopword) {
return term.toLowerCase() === stopword;
})
})
}
return {"terms": terms, "language": textLang[0][0]}
}
function readAllStopwordFilesSync(path){
var languanges = {};
files = fs.readdirSync(path);
files.forEach(function (filename){
var data = fs.readFileSync(path+"/"+filename).toString();
languanges[filename.split("-")[0]] = data.split("\n");
});
return languanges;
}
function readAllStopwordFiles(path,callback){
var languanges = {};
var readCount = 0;
fs.readdir(path, function(err, files){
if (err) throw err;
files.forEach(function (filename){
fs.readFile(path+"/"+filename, function(err, data){
if (err) throw err;
languanges[filename.split("-")[0]] = data.toString().split("\n");
readCount++;
if(readCount === files.length)
callback(languanges);
});
});
})
}
//Exports
exports.extract = extract;