-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathhealth.js
167 lines (143 loc) · 5.68 KB
/
health.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
const express = require('express');
const axios = require('axios');
const dotenv = require('dotenv');
const app = express();
const prom_app = express();
const Prometheus = require('prom-client');
dotenv.config();
const getConfig = (chainPrefix) => ({
N: parseInt(process.env[chainPrefix + "BLOCKS_BEHIND_THRESHOLD"]) || 100,
M: parseInt(process.env[chainPrefix + "RESPONSE_TIME_THRESHOLD"]) || 500,
Y: process.env[chainPrefix + "ERROR_RATE_LOOKBACK"] || 10,
errorThreshold: process.env[chainPrefix + "ERROR_THRESHOLD"] || 10,
pollInterval: parseInt(process.env.POLL_INTERVAL) || 10000,
indexerServiceUrl: process.env.INDEXER_SERVICE_URL || "https://api.thegraph.com/",
subgraphDeployment: process.env[chainPrefix + "SUBGRAPH_DEPL"] || "QmV614UpBCpuusv5MsismmPYu4KqLtdeNMKpiNrX56kw6u",
subgraphHost: process.env.SUBGRAPH_HOST || "api.thegraph.com",
rpcURL: process.env[chainPrefix + "RPC_URL"] || "https://mainnet.infura.io/v3/your-infura-project-id",
bearerToken: process.env.BEARER_TOKEN,
name: process.env[chainPrefix + "NAME"] || "unknown"
});
let healthStatuses = {};
// Prometheus metrics
// Create a Registry which registers the metrics
const register = new Prometheus.Registry()
const deltaMetric = new Prometheus.Gauge({
name: 'block_number_delta',
help: 'Block number delta between Infura and The Graph',
labelNames: ['chain'],
});
const responseTimeMetric = new Prometheus.Gauge({
name: 'response_time',
help: 'Response time in milliseconds',
labelNames: ['chain'],
});
const errorRateMetric = new Prometheus.Gauge({
name: 'error_rate',
help: 'Average error rate for the past Y queries',
labelNames: ['chain'],
});
register.registerMetric(deltaMetric);
register.registerMetric(responseTimeMetric);
register.registerMetric(errorRateMetric);
const calculateAverageErrorRate = (errorRates) => {
let total = 0;
for (let i = 0; i < errorRates.length; i++) {
total += errorRates[i];
}
return total / errorRates.length;
};
const checkHealth = async (config, errorRates) => {
try {
console.log(`[${config.name}] Checking health status...`);
const startTime = Date.now();
const subgraphResponse = await axios.post(config.indexerServiceUrl+"/subgraphs/id/"+config.subgraphDeployment, {
query: '{_meta{block{number}}}'
}, {
headers: {
Authorization: `Bearer ${config.bearerToken}`,
Host: config.subgraphHost
}
});
const responseTime = Date.now() - startTime;
const subgraphBlockNumber = JSON.parse(subgraphResponse.data.graphQLResponse).data._meta.block.number;
const { data: infuraBlockNumberResponse } = await axios.post(config.rpcURL, {
jsonrpc: "2.0",
id: 1,
method: "eth_blockNumber",
params: []
});
const infuraBlockNumber = parseInt(infuraBlockNumberResponse.result,16);
const delta = Math.abs(parseInt(subgraphBlockNumber) - infuraBlockNumber);
console.log(`[${config.name}] Subgraph block number: ${subgraphBlockNumber}, Infura block number: ${infuraBlockNumber}, Delta: ${delta}, Response time: ${responseTime}ms`);
if (delta > config.N || responseTime > config.M) {
errorRates.push(1);
} else {
errorRates.push(0);
}
if (errorRates.length > config.Y) {
errorRates.shift();
}
deltaMetric.labels(config.name).set(delta);
responseTimeMetric.labels(config.name).set(responseTime);
} catch (error) {
console.error(`[${config.name}]`, error);
errorRates.push(1);
if (errorRates.length > config.Y) {
errorRates.shift();
}
deltaMetric.labels(config.name).set(-1);
responseTimeMetric.labels(config.name).set(-1);
} finally {
const averageErrorRate = calculateAverageErrorRate(errorRates);
console.log(`[${config.name}] Average error rate for the past ${config.Y} queries: ${averageErrorRate}`);
if (averageErrorRate * 100 > config.errorThreshold) {
healthStatus = "unhealthy";
} else {
healthStatus = "healthy";
}
errorRateMetric.labels(config.name).set(averageErrorRate * 100);
}
return healthStatus;
};
app.get('/health', (req, res) => {
console.log(`Received /health request, health statuses:`, healthStatuses);
const unhealthyChains = Object.entries(healthStatuses).filter(([_, status]) => status === 'unhealthy');
if (unhealthyChains.length === 0) {
res.status(200).send("healthy");
} else {
res.status(503).send(`unhealthy: ${unhealthyChains.map(([chain]) => chain).join(', ')}`);
}
});
app.listen(3000, () => {
console.log('Health status server listening on port 3000!');
});
prom_app.listen(9090, () => {
console.log('Prometheus metrics server listening on port 9090!');
});
prom_app.get('/metrics', (req, res) => {
res.set('Content-Type', register.contentType);
register.metrics().then((data) => {
res.send(data);
});
});
const startMonitoring = (chainPrefix) => {
const config = getConfig(chainPrefix);
const errorRates = [];
const healthCheck = async () => {
healthStatuses[config.name] = await checkHealth(config, errorRates);
setTimeout(healthCheck, config.pollInterval);
};
healthCheck().then();
};
const getChains = () => {
let chainPrefixes = [];
let i = 1;
while (process.env[`CHAIN${i}_NAME`]) {
chainPrefixes.push(`CHAIN${i}_`);
i++;
}
return chainPrefixes;
};
const chains = getChains(); // This will return an array of configured chain prefixes
chains.forEach(startMonitoring);