-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathtextrazor.js
executable file
·151 lines (126 loc) · 3.91 KB
/
textrazor.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
#!/usr/bin/env node
const configed = require('dotenv').config();
// output colors
// https://www.npmjs.com/package/colors
const colors = require('colors/safe');
// file system
const fs = require('fs');
// node.js command-line interfaces made easy
// https://www.npmjs.com/package/commander
const program = require('commander');
const QueueProcessor = require('@jasonbelmonti/queue-processor');
const TextRazor = require('@jasonbelmonti/textrazor');
const NUM_PROCESSORS = 2;
// wtf
function writeJSONToFile(jsonData, path) {
fs.writeFileSync(path, jsonData, 'utf8');
}
class TextRazorCLI {
constructor(program) {
// https://www.textrazor.com/docs/rest#analysis
this._registerAnalyze(program);
}
_registerAnalyze(program) {
program
.command('analyze')
.option('-t, --text [text]', 'Content to analyze')
.option('-u, --urls [urls]', 'A list of comma-separated urls to extract and analyze', val => val.split(','))
.option('-e, --extractors <extractors>', 'A list of comma-separated extractors')
.option('-w, --write [path]', 'Save the result to [path]')
.action((options) => {
const {
text,
urls,
extractors,
write:path
} = options;
if (text === undefined && urls === undefined) {
console.error(colors.red('Missing required parameters! Supply either content or urls'));
process.exit(1);
} else if (text) {
this._analyzeText(text, extractors, path);
} else if(urls && urls.length > 0) {
if(urls.length === 1) {
this._analyzeUrl(urls[0], extractors, path);
} else {
this._analyzeUrlList(urls, extractors, path);
}
}
});
}
_analyze(analysisOptions) {
const promise = new Promise((resolve, reject) => {
TextRazor.analyze(analysisOptions).then((rawResponse) => {
resolve(rawResponse);
})
.catch((e) => {
console.log(colors.red(e));
reject(e);
});
});
return promise;
}
_analyzeText(text, extractors, path) {
this._analyze({ extractors, text }).then((result) => {
if(path) {
this._writeToJSON(result, path, text);
}
});
}
_analyzeUrl(url, extractors, path) {
this._analyze({ extractors, url }).then((result) => {
if(path) {
this._writeToJSON(result, path, url);
}
});
}
_analyzeUrlList(urls, extractors, path) {
for(let i = 0; i < NUM_PROCESSORS; i ++) {
const processor = new QueueProcessor(urls, `textrazor_${i}`, { path });
processor.process(
(url) => {
return this._analyze({ extractors, url });
},
this._onQueueSuccess.bind(this),
this._onQueueError.bind(this),
this._onQueueComplete.bind(this)
);
}
}
_writeToJSON(result, path, urlOrText) {
if(path) {
// make the directory if it doesn't exist
if (!fs.existsSync(path)){
fs.mkdirSync(path);
}
writeJSONToFile(result, `${path}/${encodeURIComponent(urlOrText)}.json`);
}
}
_onQueueSuccess(result, url, { path }) {
console.log(colors.green(`✅ ${url}`));
if (path) {
this._writeToJSON(result, path, url);
console.log(colors.green(`saved analysis to ${path}`));
}
}
_onQueueError(error) {
console.log(colors.red.bold('⚠︎ fail ⚠︎'));
console.log(colors.red(error));
}
_onQueueComplete(queue) {
console.log(colors.green(`${queue.name}`))
console.log(colors.gray(`-------------`))
// success total
console.log(colors.green.bold(`✅ ${queue.counts.success}`))
// error total
let color = queue.counts.error > 0 ? 'red' : 'gray';
console.log(colors[color](`❌ ${queue.counts.error}`));
}
}
// configuration
const { version } = require('./package.json');
program.version(version);
const textrazor = new TextRazorCLI(program);
program.parse(process.argv);
// export an instantiated singleton
module.exports = textrazor;