Merge pull request #389 from NebulousLabs/restructure-health-checks

restructure health-check architecture
This commit is contained in:
Karol Wypchło 2020-09-10 15:17:10 +02:00 committed by GitHub
commit 68450b6995
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 580 additions and 82 deletions

View File

@ -1,8 +0,0 @@
const db = require("../db");
// returns all health check entries
module.exports = (req, res) => {
const entries = db.get("entries").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -0,0 +1,14 @@
const db = require("../db");
const { getYesterdayISOString } = require("../utils");
// returns all critical health check entries
module.exports = (req, res) => {
const yesterday = getYesterdayISOString();
const entries = db
.get("critical")
.orderBy("date", "desc")
.filter(({ date }) => date > yesterday)
.value();
res.send(entries);
};

View File

@ -2,61 +2,74 @@ const { StatusCodes } = require("http-status-codes");
const { sum, sumBy } = require("lodash");
const db = require("../db");
// getStatus returns the server's current health check status
function getStatus() {
const disabled = db.get("disabled").value();
/**
* Get status code that should be returned in the API response.
* - OK (200) in case everything is healthy
* - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled
*/
function getStatusCode() {
// check whether the portal has been manually disabled
const disabled = getDisabled();
if (disabled) {
return StatusCodes.SERVICE_UNAVAILABLE;
}
// Grab entry element from DB
const entry = db.get("entries").orderBy("date", "desc").head().value();
// grab the most recent critical entry element from DB
const entry = getMostRecentCriticalEntry();
// Check that every critical check entry is up
if (entry && entry.checks.every(({ up, critical }) => up && critical)) {
return StatusCodes.OK;
}
// At least one check failed
// in case there is no entry yet or at least one check failed in the most recent entry
if (!entry || entry.checks.some(({ up }) => !up)) {
return StatusCodes.SERVICE_UNAVAILABLE;
}
// getTimeout returns the average time out from a sample of 10 health check
// entries.
function getTimeout() {
if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) {
return 0;
return StatusCodes.OK;
}
// Grab 10 entries from the database as a sample to determine the average
// timeout for the server.
/**
* Get the sample of most recent critical entries and
* calculate the average response time of all of them
*/
function getAverageResponseTime() {
// get most recent 10 successfull checks for the calculation
const sample = db
.get("entries")
.get("critical")
.orderBy("date", "desc")
.filter(({ checks }) => checks.every(({ up, critical }) => up && critical))
.filter(({ checks }) => checks.every(({ up }) => up))
.take(10)
.value();
// Return average timeout
// calculate average time of response
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
}
// getEntriesSinceYesterday gets the health check entries since yesterday
function getEntriesSinceYesterday() {
const yesterday = new Date();
/**
* Get one, most current critical entry
*/
function getMostRecentCriticalEntry() {
return db.get("critical").orderBy("date", "desc").head().value();
}
yesterday.setDate(yesterday.getDate() - 1);
return db
.get("entries")
.orderBy("date", "desc")
.filter(({ date }) => date >= yesterday.toISOString())
.value();
/**
* Get the disabled flag state (manual portal disable)
*/
function getDisabled() {
return db.get("disabled").value();
}
module.exports = (req, res) => {
const statusCode = getStatusCode();
const timeout = statusCode === StatusCodes.OK ? getAverageResponseTime() : 0;
// We want to delay the response for the load balancer to be able to prioritize
// servers based on the successful response time of this endpoint. Load balancer
// will pull the server if the response is an error so there is no point in delaying
// failures, hence 0 timeout on those.
setTimeout(() => {
res.status(getStatus()).send(getEntriesSinceYesterday());
}, getTimeout());
// include some health information in the response body
const entry = getMostRecentCriticalEntry();
const disabled = getDisabled();
res.status(statusCode).send({ disabled, entry });
}, timeout);
};

View File

@ -1,16 +0,0 @@
const db = require("../db");
// returns all health check entries that are not older than one day
module.exports = (req, res) => {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const entries = db
.get("entries")
.orderBy("date", "desc")
.filter(({ date }) => date >= yesterday.toISOString())
.value();
res.send(entries);
};

View File

@ -0,0 +1,14 @@
const db = require("../db");
const { getYesterdayISOString } = require("../utils");
// returns all verbose health check entries
module.exports = (req, res) => {
const yesterday = getYesterdayISOString();
const entries = db
.get("verbose")
.orderBy("date", "desc")
.filter(({ date }) => date > yesterday)
.value();
res.send(entries);
};

View File

@ -17,7 +17,6 @@ async function uploadCheck(done) {
up: statusCode === StatusCodes.OK,
statusCode,
time: calculateElapsedTime(time),
critical: true,
});
});
}
@ -41,7 +40,6 @@ async function downloadCheck(done) {
up: statusCode === StatusCodes.OK,
statusCode,
time: calculateElapsedTime(time),
critical: true,
});
}

View File

@ -60,9 +60,477 @@ function dappExampleCheck(done) {
skylink: "EADWpKD0myqH2tZa6xtKebg6kNnwYnI94fl4R8UKgNrmOA",
bodyHash: "d6ad2506590bb45b5acc6a8a964a3da4d657354f",
metadata: {
filename: "/index.html",
length: 4131,
subfiles: { "index.html": { filename: "index.html", contenttype: "text/html", len: 4131 } },
filename: "build",
length: 15578459,
subfiles: {
"451.html": {
filename: "451.html",
contenttype: "text/html",
offset: 40966,
len: 200,
},
"asset-manifest.json": {
filename: "asset-manifest.json",
contenttype: "application/json",
offset: 35832,
len: 5134,
},
"favicon.ico": {
filename: "favicon.ico",
contenttype: "image/vnd.microsoft.icon",
len: 31701,
},
"index.html": {
filename: "index.html",
contenttype: "text/html",
offset: 31701,
len: 4131,
},
"locales/de.json": {
filename: "locales/de.json",
contenttype: "application/json",
offset: 15542609,
len: 4376,
},
"locales/en.json": {
filename: "locales/en.json",
contenttype: "application/json",
offset: 15558827,
len: 4049,
},
"locales/es-AR.json": {
filename: "locales/es-AR.json",
contenttype: "application/json",
offset: 15551984,
len: 3624,
},
"locales/es-US.json": {
filename: "locales/es-US.json",
contenttype: "application/json",
offset: 15574829,
len: 3630,
},
"locales/it-IT.json": {
filename: "locales/it-IT.json",
contenttype: "application/json",
offset: 15538386,
len: 4223,
},
"locales/ro.json": {
filename: "locales/ro.json",
contenttype: "application/json",
offset: 15562876,
len: 3794,
},
"locales/ru.json": {
filename: "locales/ru.json",
contenttype: "application/json",
offset: 15546985,
len: 4999,
},
"locales/vi.json": {
filename: "locales/vi.json",
contenttype: "application/json",
offset: 15569928,
len: 4901,
},
"locales/zh-CN.json": {
filename: "locales/zh-CN.json",
contenttype: "application/json",
offset: 15555608,
len: 3219,
},
"locales/zh-TW.json": {
filename: "locales/zh-TW.json",
contenttype: "application/json",
offset: 15566670,
len: 3258,
},
"manifest.json": {
filename: "manifest.json",
contenttype: "application/json",
offset: 41166,
len: 297,
},
"precache-manifest.cd4677068c6058f8626d6818e2c12fd3.js": {
filename: "precache-manifest.cd4677068c6058f8626d6818e2c12fd3.js",
contenttype: "text/javascript",
offset: 41463,
len: 4721,
},
"service-worker.js": {
filename: "service-worker.js",
contenttype: "text/javascript",
offset: 46184,
len: 1185,
},
"static/css/0.07de6c03.chunk.css": {
filename: "static/css/0.07de6c03.chunk.css",
contenttype: "text/css",
offset: 15537249,
len: 285,
},
"static/css/0.07de6c03.chunk.css.map": {
filename: "static/css/0.07de6c03.chunk.css.map",
contenttype: "application/octet-stream",
offset: 15537818,
len: 568,
},
"static/css/5.d75e0ccb.chunk.css": {
filename: "static/css/5.d75e0ccb.chunk.css",
contenttype: "text/css",
offset: 15537534,
len: 284,
},
"static/css/5.d75e0ccb.chunk.css.map": {
filename: "static/css/5.d75e0ccb.chunk.css.map",
contenttype: "application/octet-stream",
offset: 15536511,
len: 738,
},
"static/js/0.58b0f69f.chunk.js": {
filename: "static/js/0.58b0f69f.chunk.js",
contenttype: "text/javascript",
offset: 7300150,
len: 30029,
},
"static/js/0.58b0f69f.chunk.js.map": {
filename: "static/js/0.58b0f69f.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12111459,
len: 81144,
},
"static/js/1.19c370e0.chunk.js": {
filename: "static/js/1.19c370e0.chunk.js",
contenttype: "text/javascript",
offset: 15495781,
len: 40203,
},
"static/js/1.19c370e0.chunk.js.map": {
filename: "static/js/1.19c370e0.chunk.js.map",
contenttype: "application/octet-stream",
offset: 7330179,
len: 104594,
},
"static/js/10.8ea29dcd.chunk.js": {
filename: "static/js/10.8ea29dcd.chunk.js",
contenttype: "text/javascript",
offset: 15483299,
len: 12345,
},
"static/js/10.8ea29dcd.chunk.js.map": {
filename: "static/js/10.8ea29dcd.chunk.js.map",
contenttype: "application/octet-stream",
offset: 14524416,
len: 30393,
},
"static/js/11.764b8915.chunk.js": {
filename: "static/js/11.764b8915.chunk.js",
contenttype: "text/javascript",
offset: 12208196,
len: 7103,
},
"static/js/11.764b8915.chunk.js.map": {
filename: "static/js/11.764b8915.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12192603,
len: 15593,
},
"static/js/12.88d4fbe5.chunk.js": {
filename: "static/js/12.88d4fbe5.chunk.js",
contenttype: "text/javascript",
offset: 12055261,
len: 16721,
},
"static/js/12.88d4fbe5.chunk.js.map": {
filename: "static/js/12.88d4fbe5.chunk.js.map",
contenttype: "application/octet-stream",
offset: 14460215,
len: 46695,
},
"static/js/13.ea207f69.chunk.js": {
filename: "static/js/13.ea207f69.chunk.js",
contenttype: "text/javascript",
offset: 7168280,
len: 347,
},
"static/js/13.ea207f69.chunk.js.map": {
filename: "static/js/13.ea207f69.chunk.js.map",
contenttype: "application/octet-stream",
offset: 6928538,
len: 563,
},
"static/js/14.d8bc0d4c.chunk.js": {
filename: "static/js/14.d8bc0d4c.chunk.js",
contenttype: "text/javascript",
offset: 12870711,
len: 336,
},
"static/js/14.d8bc0d4c.chunk.js.map": {
filename: "static/js/14.d8bc0d4c.chunk.js.map",
contenttype: "application/octet-stream",
offset: 15535984,
len: 527,
},
"static/js/15.e6215497.chunk.js": {
filename: "static/js/15.e6215497.chunk.js",
contenttype: "text/javascript",
offset: 15495644,
len: 137,
},
"static/js/15.e6215497.chunk.js.map": {
filename: "static/js/15.e6215497.chunk.js.map",
contenttype: "application/octet-stream",
offset: 6928431,
len: 107,
},
"static/js/2.f6da9598.chunk.js": {
filename: "static/js/2.f6da9598.chunk.js",
contenttype: "text/javascript",
offset: 14506910,
len: 17506,
},
"static/js/2.f6da9598.chunk.js.map": {
filename: "static/js/2.f6da9598.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12071982,
len: 39477,
},
"static/js/5.5cc0868a.chunk.js": {
filename: "static/js/5.5cc0868a.chunk.js",
contenttype: "text/javascript",
offset: 10199338,
len: 1842002,
},
"static/js/5.5cc0868a.chunk.js.LICENSE": {
filename: "static/js/5.5cc0868a.chunk.js.LICENSE",
contenttype: "application/octet-stream",
offset: 14554809,
len: 3119,
},
"static/js/5.5cc0868a.chunk.js.map": {
filename: "static/js/5.5cc0868a.chunk.js.map",
contenttype: "application/octet-stream",
offset: 289328,
len: 6632626,
},
"static/js/6.b7681521.chunk.js": {
filename: "static/js/6.b7681521.chunk.js",
contenttype: "text/javascript",
offset: 14237363,
len: 222852,
},
"static/js/6.b7681521.chunk.js.map": {
filename: "static/js/6.b7681521.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12215299,
len: 655412,
},
"static/js/7.0614dbc4.chunk.js": {
filename: "static/js/7.0614dbc4.chunk.js",
contenttype: "text/javascript",
offset: 6921954,
len: 6477,
},
"static/js/7.0614dbc4.chunk.js.map": {
filename: "static/js/7.0614dbc4.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12041340,
len: 13921,
},
"static/js/8.7975098c.chunk.js": {
filename: "static/js/8.7975098c.chunk.js",
contenttype: "text/javascript",
offset: 13796515,
len: 420712,
},
"static/js/8.7975098c.chunk.js.LICENSE": {
filename: "static/js/8.7975098c.chunk.js.LICENSE",
contenttype: "application/octet-stream",
offset: 13796191,
len: 324,
},
"static/js/8.7975098c.chunk.js.map": {
filename: "static/js/8.7975098c.chunk.js.map",
contenttype: "application/octet-stream",
offset: 12871047,
len: 925144,
},
"static/js/9.cc860b76.chunk.js": {
filename: "static/js/9.cc860b76.chunk.js",
contenttype: "text/javascript",
offset: 14557928,
len: 920812,
},
"static/js/9.cc860b76.chunk.js.LICENSE": {
filename: "static/js/9.cc860b76.chunk.js.LICENSE",
contenttype: "application/octet-stream",
offset: 15478740,
len: 4559,
},
"static/js/9.cc860b76.chunk.js.map": {
filename: "static/js/9.cc860b76.chunk.js.map",
contenttype: "application/octet-stream",
offset: 7434773,
len: 2764565,
},
"static/js/main.a7822f79.chunk.js": {
filename: "static/js/main.a7822f79.chunk.js",
contenttype: "text/javascript",
offset: 7168627,
len: 131523,
},
"static/js/main.a7822f79.chunk.js.map": {
filename: "static/js/main.a7822f79.chunk.js.map",
contenttype: "application/octet-stream",
offset: 6929101,
len: 239179,
},
"static/js/runtime-main.68d129c6.js": {
filename: "static/js/runtime-main.68d129c6.js",
contenttype: "text/javascript",
offset: 14217227,
len: 3546,
},
"static/js/runtime-main.68d129c6.js.map": {
filename: "static/js/runtime-main.68d129c6.js.map",
contenttype: "application/octet-stream",
offset: 14220773,
len: 16590,
},
"static/media/arrow-down-blue.cd061363.svg": {
filename: "static/media/arrow-down-blue.cd061363.svg",
contenttype: "image/svg+xml",
offset: 219284,
len: 326,
},
"static/media/arrow-down-grey.c0dedd2f.svg": {
filename: "static/media/arrow-down-grey.c0dedd2f.svg",
contenttype: "image/svg+xml",
offset: 196726,
len: 326,
},
"static/media/arrow-right-white.337ad716.png": {
filename: "static/media/arrow-right-white.337ad716.png",
contenttype: "image/png",
offset: 197052,
len: 12999,
},
"static/media/arrow-right.d285b6cf.svg": {
filename: "static/media/arrow-right.d285b6cf.svg",
contenttype: "image/svg+xml",
offset: 289065,
len: 263,
},
"static/media/circle-grey.ed2a1dad.svg": {
filename: "static/media/circle-grey.ed2a1dad.svg",
contenttype: "image/svg+xml",
offset: 210213,
len: 321,
},
"static/media/circle.2d975615.svg": {
filename: "static/media/circle.2d975615.svg",
contenttype: "image/svg+xml",
offset: 210534,
len: 321,
},
"static/media/coinbaseWalletIcon.62578f59.svg": {
filename: "static/media/coinbaseWalletIcon.62578f59.svg",
contenttype: "image/svg+xml",
offset: 220450,
len: 53626,
},
"static/media/dropdown-blue.b20914ec.svg": {
filename: "static/media/dropdown-blue.b20914ec.svg",
contenttype: "image/svg+xml",
offset: 47369,
len: 164,
},
"static/media/dropdown.7d32d2fa.svg": {
filename: "static/media/dropdown.7d32d2fa.svg",
contenttype: "image/svg+xml",
offset: 287941,
len: 164,
},
"static/media/dropup-blue.b96d70e1.svg": {
filename: "static/media/dropup-blue.b96d70e1.svg",
contenttype: "image/svg+xml",
offset: 210051,
len: 162,
},
"static/media/ethereum-logo.802c6eac.svg": {
filename: "static/media/ethereum-logo.802c6eac.svg",
contenttype: "image/svg+xml",
offset: 219610,
len: 840,
},
"static/media/magnifying-glass.67440097.svg": {
filename: "static/media/magnifying-glass.67440097.svg",
contenttype: "image/svg+xml",
offset: 210855,
len: 8429,
},
"static/media/metamask.023762b6.png": {
filename: "static/media/metamask.023762b6.png",
contenttype: "image/png",
offset: 61600,
len: 114217,
},
"static/media/plus-blue.e8021e51.svg": {
filename: "static/media/plus-blue.e8021e51.svg",
contenttype: "image/svg+xml",
offset: 196237,
len: 190,
},
"static/media/plus-grey.d8e0be7d.svg": {
filename: "static/media/plus-grey.d8e0be7d.svg",
contenttype: "image/svg+xml",
offset: 288875,
len: 190,
},
"static/media/portisIcon.b234b2bf.png": {
filename: "static/media/portisIcon.b234b2bf.png",
contenttype: "image/png",
offset: 274076,
len: 13865,
},
"static/media/question-mark.1ae4d9f4.svg": {
filename: "static/media/question-mark.1ae4d9f4.svg",
contenttype: "image/svg+xml",
offset: 175817,
len: 818,
},
"static/media/question.cc0a2451.svg": {
filename: "static/media/question.cc0a2451.svg",
contenttype: "image/svg+xml",
offset: 288105,
len: 770,
},
"static/media/spinner.be00fc4a.svg": {
filename: "static/media/spinner.be00fc4a.svg",
contenttype: "image/svg+xml",
offset: 47533,
len: 694,
},
"static/media/trustWallet.edcc1ab5.png": {
filename: "static/media/trustWallet.edcc1ab5.png",
contenttype: "image/png",
offset: 176635,
len: 19602,
},
"static/media/walletConnectIcon.8215855c.svg": {
filename: "static/media/walletConnectIcon.8215855c.svg",
contenttype: "image/svg+xml",
offset: 48227,
len: 13373,
},
"static/media/x.5b8e2186.svg": {
filename: "static/media/x.5b8e2186.svg",
contenttype: "image/svg+xml",
offset: 196427,
len: 299,
},
},
},
};
@ -575,8 +1043,8 @@ function skylinkVerification(done, { name, skylink, bodyHash, metadata }) {
}
// Check if the metadata is valid by deep comparing expected value with response
const currentMetadata =
response.header["skynet-file-metadata"] && JSON.parse(response.header["skynet-file-metadata"]);
const metadataHeader = response.header["skynet-file-metadata"];
const currentMetadata = metadataHeader && JSON.parse(metadataHeader);
if (!isEqual(currentMetadata, metadata)) {
entry.up = false;
info.metadata = detailedDiff(currentMetadata, metadata);

View File

@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory");
if (!fs.existsSync("state")) fs.mkdirSync("state");
const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory();
const adapter = new FileSync("state/state.json");
const db = low(adapter);
db.defaults({ disabled: false, entries: [] }).write();
db.defaults({ disabled: false, critical: [], verbose: [] }).write();
module.exports = db;

View File

@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false }));
server.use(bodyparser.json());
server.get("/health-check", require("./api/index"));
server.get("/health-check/recent", require("./api/recent"));
server.get("/health-check/all", require("./api/all"));
server.get("/health-check/critical", require("./api/critical"));
server.get("/health-check/verbose", require("./api/verbose"));
server.get("/health-check/disabled", require("./api/disabled"));
server.listen(port, host, (error) => {

View File

@ -4,25 +4,27 @@ const { criticalChecks } = require("./checks/critical");
const { verboseChecks } = require("./checks/verbose");
// execute the critical health-check script every 5 minutes
const basicJob = schedule.scheduleJob("*/5 * * * *", async () => {
const entry = { date: new Date().toISOString(), checks: [] };
const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => {
const entry = {
date: new Date().toISOString(),
checks: await Promise.all(criticalChecks.map((check) => new Promise(check))),
};
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write();
db.get("critical").push(entry).write();
});
// execute the verbose health-check script once per hour
const verboseJob = schedule.scheduleJob("0 * * * *", async () => {
const entry = { date: new Date().toISOString(), checks: [] };
const entry = {
date: new Date().toISOString(),
checks: await Promise.all(verboseChecks.map((check) => new Promise(check))),
};
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write();
db.get("verbose").push(entry).write();
});
// Launch Health check jobs
setTimeout(() => {
basicJob.invoke();
criticalJob.invoke();
verboseJob.invoke();
}, 60 * 1000); // delay for 60s to give other services time to start up

View File

@ -1,8 +1,21 @@
// return the time between start and now in milliseconds
/**
* Get the time between start and now in milliseconds
*/
function calculateElapsedTime(start) {
const diff = process.hrtime(start);
return Math.round((diff[0] * 1e9 + diff[1]) / 1e6); // msec
}
module.exports = { calculateElapsedTime };
/**
* Get the ISO string with yesterday's date set (- 24 hours)
*/
function getYesterdayISOString() {
const date = new Date();
date.setDate(date.getDate() - 1);
return date.toISOString();
}
module.exports = { calculateElapsedTime, getYesterdayISOString };

View File

@ -55,7 +55,7 @@ async def run_checks():
try:
await check_load_average()
await check_disk()
await check_health()
# await check_health() # FIXME: adjust it to work with https://github.com/NebulousLabs/skynet-webportal/pull/389
except:
trace = traceback.format_exc()
print("[DEBUG] run_checks() failed.")