restructure health-check architecture

This commit is contained in:
Karol Wypchlo 2020-09-10 12:15:54 +02:00
parent 64cd7b135c
commit 307d736a69
8 changed files with 71 additions and 65 deletions

View File

@ -1,8 +0,0 @@
const db = require("../db");
// returns all health check entries
module.exports = (req, res) => {
const entries = db.get("entries").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -0,0 +1,8 @@
const db = require("../db");
// returns all critical health check entries
module.exports = (req, res) => {
const entries = db.get("critical").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -2,61 +2,75 @@ const { StatusCodes } = require("http-status-codes");
const { sum, sumBy } = require("lodash");
const db = require("../db");
// getStatus returns the server's current health check status
function getStatus() {
const disabled = db.get("disabled").value();
/**
* Get status code that should be returned in the API response.
* - OK (200) in case everything is healthy
* - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled
*/
function getStatusCode() {
// check whether the portal has been manually disabled
const disabled = getDisabled();
if (disabled) {
return StatusCodes.SERVICE_UNAVAILABLE;
}
// Grab entry element from DB
const entry = db.get("entries").orderBy("date", "desc").head().value();
// grab one most recent critical entry element from DB
const entry = getCurrentCriticalEntry();
// Check that every critical check entry is up
if (entry && entry.checks.every(({ up, critical }) => up && critical)) {
// find out whether every check in the entry is up
if (entry && entry.checks.every(({ up }) => up)) {
return StatusCodes.OK;
}
// At least one check failed
// in case at least one check failed
return StatusCodes.SERVICE_UNAVAILABLE;
}
// getTimeout returns the average time out from a sample of 10 health check
// entries.
function getTimeout() {
if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) {
return 0;
}
// Grab 10 entries from the database as a sample to determine the average
// timeout for the server.
/**
* Get the sample of most recent critical entries and
* calculate the avarage response time of all of them
*/
function getAvarageResponseTime() {
// get most recent 10 successfull checks for the calculation
const sample = db
.get("entries")
.get("critical")
.orderBy("date", "desc")
.filter(({ checks }) => checks.every(({ up, critical }) => up && critical))
.filter(({ checks }) => checks.every(({ up }) => up))
.take(10)
.value();
// Return average timeout
// calculate avarage time of response
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
}
// getEntriesSinceYesterday gets the health check entries since yesterday
function getEntriesSinceYesterday() {
const yesterday = new Date();
/**
* Get one, most current critical entry
*/
function getCurrentCriticalEntry() {
return db.get("critical").orderBy("date", "desc").head().value();
}
yesterday.setDate(yesterday.getDate() - 1);
return db
.get("entries")
.orderBy("date", "desc")
.filter(({ date }) => date >= yesterday.toISOString())
.value();
/**
* Get the disabled flag state (manual portal disable)
*/
function getDisabled() {
return db.get("disabled").value();
}
module.exports = (req, res) => {
const statusCode = getStatusCode();
const timeout = statusCode === StatusCodes.OK ? getAvarageResponseTime() : 0;
// We want to delay the response for the load balancer to be able to prioritize
// servers based on the successful response time of thid endpoint. Load balancer
// will pull the server if the response is an error so there is no point in delaying
// failures, hence 0 timeout on those.
setTimeout(() => {
res.status(getStatus()).send(getEntriesSinceYesterday());
}, getTimeout());
// include some health information in the response body
const entry = getCurrentCriticalEntry();
const disabled = getDisabled();
res.status(statusCode).send({ disabled, entry });
}, timeout);
};

View File

@ -1,16 +0,0 @@
const db = require("../db");
// returns all health check entries that are not older than one day
module.exports = (req, res) => {
const yesterday = new Date();
yesterday.setDate(yesterday.getDate() - 1);
const entries = db
.get("entries")
.orderBy("date", "desc")
.filter(({ date }) => date >= yesterday.toISOString())
.value();
res.send(entries);
};

View File

@ -0,0 +1,8 @@
const db = require("../db");
// returns all verbose health check entries
module.exports = (req, res) => {
const entries = db.get("verbose").orderBy("date", "desc").value();
res.send(entries);
};

View File

@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory");
if (!fs.existsSync("state")) fs.mkdirSync("state");
const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory();
const adapter = new FileSync("state/state.json");
const db = low(adapter);
db.defaults({ disabled: false, entries: [] }).write();
db.defaults({ disabled: false, critical: [], verbose: [] }).write();
module.exports = db;

View File

@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false }));
server.use(bodyparser.json());
server.get("/health-check", require("./api/index"));
server.get("/health-check/recent", require("./api/recent"));
server.get("/health-check/all", require("./api/all"));
server.get("/health-check/critical", require("./api/critical"));
server.get("/health-check/verbose", require("./api/verbose"));
server.get("/health-check/disabled", require("./api/disabled"));
server.listen(port, host, (error) => {

View File

@ -4,12 +4,12 @@ const { criticalChecks } = require("./checks/critical");
const { verboseChecks } = require("./checks/verbose");
// execute the critical health-check script every 5 minutes
const basicJob = schedule.scheduleJob("*/5 * * * *", async () => {
const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => {
const entry = { date: new Date().toISOString(), checks: [] };
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write();
db.get("critical").push(entry).write();
});
// execute the verbose health-check script once per hour
@ -18,11 +18,11 @@ const verboseJob = schedule.scheduleJob("0 * * * *", async () => {
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
db.get("entries").push(entry).write();
db.get("verbose").push(entry).write();
});
// Launch Health check jobs
setTimeout(() => {
basicJob.invoke();
criticalJob.invoke();
verboseJob.invoke();
}, 60 * 1000); // delay for 60s to give other services time to start up
}, 60 * 0); // delay for 60s to give other services time to start up