restructure health-check architecture

This commit is contained in:
Karol Wypchlo 2020-09-10 12:15:54 +02:00
parent 64cd7b135c
commit 307d736a69
8 changed files with 71 additions and 65 deletions

View File

@ -1,8 +0,0 @@
const db = require("../db");
// returns all health check entries
module.exports = (req, res) => {
const entries = db.get("entries").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -0,0 +1,8 @@
const db = require("../db");
// returns all critical health check entries
module.exports = (req, res) => {
const entries = db.get("critical").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -2,61 +2,75 @@ const { StatusCodes } = require("http-status-codes");
const { sum, sumBy } = require("lodash"); const { sum, sumBy } = require("lodash");
const db = require("../db"); const db = require("../db");
// getStatus returns the server's current health check status /**
function getStatus() { * Get status code that should be returned in the API response.
const disabled = db.get("disabled").value(); * - OK (200) in case everything is healthy
* - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled
*/
function getStatusCode() {
// check whether the portal has been manually disabled
const disabled = getDisabled();
if (disabled) { if (disabled) {
return StatusCodes.SERVICE_UNAVAILABLE; return StatusCodes.SERVICE_UNAVAILABLE;
} }
// Grab entry element from DB // grab one most recent critical entry element from DB
const entry = db.get("entries").orderBy("date", "desc").head().value(); const entry = getCurrentCriticalEntry();
// Check that every critical check entry is up // find out whether every check in the entry is up
if (entry && entry.checks.every(({ up, critical }) => up && critical)) { if (entry && entry.checks.every(({ up }) => up)) {
return StatusCodes.OK; return StatusCodes.OK;
} }
// At least one check failed // in case at least one check failed
return StatusCodes.SERVICE_UNAVAILABLE; return StatusCodes.SERVICE_UNAVAILABLE;
} }
// getTimeout returns the average time out from a sample of 10 health check /**
// entries. * Get the sample of most recent critical entries and
function getTimeout() { * calculate the avarage response time of all of them
if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) { */
return 0; function getAvarageResponseTime() {
} // get most recent 10 successfull checks for the calculation
// Grab 10 entries from the database as a sample to determine the average
// timeout for the server.
const sample = db const sample = db
.get("entries") .get("critical")
.orderBy("date", "desc") .orderBy("date", "desc")
.filter(({ checks }) => checks.every(({ up, critical }) => up && critical)) .filter(({ checks }) => checks.every(({ up }) => up))
.take(10) .take(10)
.value(); .value();
// Return average timeout // calculate avarage time of response
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size); return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
} }
// getEntriesSinceYesterday gets the health check entries since yesterday /**
function getEntriesSinceYesterday() { * Get one, most current critical entry
const yesterday = new Date(); */
function getCurrentCriticalEntry() {
return db.get("critical").orderBy("date", "desc").head().value();
}
yesterday.setDate(yesterday.getDate() - 1); /**
* Get the disabled flag state (manual portal disable)
return db */
.get("entries") function getDisabled() {
.orderBy("date", "desc") return db.get("disabled").value();
.filter(({ date }) => date >= yesterday.toISOString())
.value();
} }
module.exports = (req, res) => { module.exports = (req, res) => {
const statusCode = getStatusCode();
const timeout = statusCode === StatusCodes.OK ? getAvarageResponseTime() : 0;
// We want to delay the response for the load balancer to be able to prioritize
// servers based on the successful response time of thid endpoint. Load balancer
// will pull the server if the response is an error so there is no point in delaying
// failures, hence 0 timeout on those.
setTimeout(() => { setTimeout(() => {
res.status(getStatus()).send(getEntriesSinceYesterday()); // include some health information in the response body
}, getTimeout()); const entry = getCurrentCriticalEntry();
const disabled = getDisabled();
res.status(statusCode).send({ disabled, entry });
}, timeout);
}; };

View File

@ -1,16 +0,0 @@
const db = require("../db");
// returns all health check entries that are not older than one day
module.exports = (req, res) => {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const entries = db
.get("entries")
.orderBy("date", "desc")
.filter(({ date }) => date >= yesterday.toISOString())
.value();
res.send(entries);
};

View File

@ -0,0 +1,8 @@
const db = require("../db");
// returns all verbose health check entries
module.exports = (req, res) => {
const entries = db.get("verbose").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory");
if (!fs.existsSync("state")) fs.mkdirSync("state"); if (!fs.existsSync("state")) fs.mkdirSync("state");
const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory(); const adapter = new FileSync("state/state.json");
const db = low(adapter); const db = low(adapter);
db.defaults({ disabled: false, entries: [] }).write(); db.defaults({ disabled: false, critical: [], verbose: [] }).write();
module.exports = db; module.exports = db;

View File

@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false }));
server.use(bodyparser.json()); server.use(bodyparser.json());
server.get("/health-check", require("./api/index")); server.get("/health-check", require("./api/index"));
server.get("/health-check/recent", require("./api/recent")); server.get("/health-check/critical", require("./api/critical"));
server.get("/health-check/all", require("./api/all")); server.get("/health-check/verbose", require("./api/verbose"));
server.get("/health-check/disabled", require("./api/disabled")); server.get("/health-check/disabled", require("./api/disabled"));
server.listen(port, host, (error) => { server.listen(port, host, (error) => {

View File

@ -4,12 +4,12 @@ const { criticalChecks } = require("./checks/critical");
const { verboseChecks } = require("./checks/verbose"); const { verboseChecks } = require("./checks/verbose");
// execute the critical health-check script every 5 minutes // execute the critical health-check script every 5 minutes
const basicJob = schedule.scheduleJob("*/5 * * * *", async () => { const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => {
const entry = { date: new Date().toISOString(), checks: [] }; const entry = { date: new Date().toISOString(), checks: [] };
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check))); entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write(); db.get("critical").push(entry).write();
}); });
// execute the verbose health-check script once per hour // execute the verbose health-check script once per hour
@ -18,11 +18,11 @@ const verboseJob = schedule.scheduleJob("0 * * * *", async () => {
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check))); entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write(); db.get("verbose").push(entry).write();
}); });
// Launch Health check jobs // Launch Health check jobs
setTimeout(() => { setTimeout(() => {
basicJob.invoke(); criticalJob.invoke();
verboseJob.invoke(); verboseJob.invoke();
}, 60 * 1000); // delay for 60s to give other services time to start up }, 60 * 0); // delay for 60s to give other services time to start up