-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathgsheetCrawler.js
149 lines (132 loc) · 4.79 KB
/
gsheetCrawler.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
/**
* TODO:
* 1. Get the new access token at the following website [1]
* - Authorize all the APIs in `Google Sheets API v4`
* 2. Modify the `START_ROW` to the row you want to start crawling
* - Check the last row had been crawled in [2]
*
* [1] Get the access token:
* https://developers.google.com/oauthplayground/
* [2] Post google sheet:
* https://docs.google.com/spreadsheets/d/1QJvhTWVKJlqvuBHcxNVjyCUfoFMn4ll91bOB4H3gjNc/edit#gid=1992180234
*/
// ******* Modify here ********
// LastRow=709 Update at 2020.03.30
var START_ROW = 710; // The row in google sheet where you want to start crawling.
// ****************************
var mysql = require('mysql');
var { google } = require('googleapis')
var client = require('./config.crawler.json')
var RANGE = `A${START_ROW}:W`
const FOUR_SPACE = "\xa0\xa0\xa0\xa0";
const GSHEET = {
"rowId": 0,
"courseName": 2,
"teacher": 3,
"catalog": 7,
"semester": 4,
"courseMaterial": 14,
"courseStyle": 15,
"rollStyle": 16,
"testStyle": 17,
"reportStyle": 18,
"homework": 19,
"comment": 20,
"devote": 21,
"gain": 22
}
var connection = mysql.createConnection({
host: client.DB.host,
user: client.DB.user,
password: client.DB.password,
database: client.DB.db
});
connection.connect();
function authorize(callback) {
const { client_secret, client_id, redirect_uris } = client.installed;
const oAuth2Client = new google.auth.OAuth2(
client_id, client_secret, redirect_uris[0]
);
oAuth2Client.setCredentials({
access_token: client.credential.access,
refresh_token: client.credential.refresh
}
);
callback(oAuth2Client);
}
authorize(function (auth) {
var sheets = google.sheets('v4');
sheets.spreadsheets.values.get({
spreadsheetId: client.google_sheet_id_origin,
range: RANGE,
auth: auth
}, (err, response) => {
if (err) {
console.log('The API returned an error: ' + err);
return;
}
totalData = response.data.values;
for (let row in totalData) {
var rowData = {}
for (let col in totalData[row]) {
totalData[row][col] = totalData[row][col].replace(/\"|\'|\#|\/\*/g, "")
}
rowData['course_name'] = totalData[row][GSHEET.courseName]
rowData['teacher'] = totalData[row][GSHEET.teacher]
rowData['catalog'] = totalData[row][GSHEET.catalog]
rowData['semester'] = totalData[row][GSHEET.semester]
rowData['comment'] = ''
rowData['comment'] += '[上課教材]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.courseMaterial]
rowData['comment'] += '\n\n[教學方法]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.courseStyle]
rowData['comment'] += '\n\n[點名方式]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.rollStyle]
rowData['comment'] += '\n\n[考試方式]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.testStyle]
rowData['comment'] += '\n\n[報告方式]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.reportStyle]
rowData['comment'] += '\n\n[作業方式]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.homework]
rowData['comment'] += '\n\n[心得]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.comment]
rowData['comment'] += '\n\n[付出]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.devote]
rowData['comment'] += '\n\n[收穫]\n\n' + FOUR_SPACE
rowData['comment'] += totalData[row][GSHEET.gain] + "\n"
rowData['row_gsheet'] = START_ROW
rowData['crawl_id'] = regCrawlID(totalData[row][GSHEET.rowId])
connection.query(makeInsertSQL(rowData), function (err, res, fields) {
if (err)
console.log(err)
})
console.log("Insert row: " + rowData['row_gsheet'])
START_ROW++;
}
console.log("... Finish ...")
});
})
function regCrawlID(id) {
let reg = new RegExp('[0-9]+');
if (reg.test(id)) {
id = reg.exec(id)[0];
} else {
id = -1; //“無效”“???” in 發佈column
}
return id;
}
function makeInsertSQL(data) {
let name = ''
let value = ''
for (let d in data) {
name += d
name += ','
value += '"'
value += data[d]
value += '",'
}
name = name.substring(0, name.length - 1)
value = value.substring(0, value.length - 1)
let sql = `INSERT INTO post (${name}) VALUES (${value})`
return sql;
}