restructure health-check architecture
This commit is contained in:
parent
64cd7b135c
commit
307d736a69
|
@ -1,8 +0,0 @@
|
|||
const db = require("../db");
|
||||
|
||||
// returns all health check entries
|
||||
module.exports = (req, res) => {
|
||||
const entries = db.get("entries").orderBy("date", "desc").value();
|
||||
|
||||
res.send(entries);
|
||||
};
|
|
@ -0,0 +1,8 @@
|
|||
const db = require("../db");
|
||||
|
||||
// returns all critical health check entries
|
||||
module.exports = (req, res) => {
|
||||
const entries = db.get("critical").orderBy("date", "desc").value();
|
||||
|
||||
res.send(entries);
|
||||
};
|
|
@ -2,61 +2,75 @@ const { StatusCodes } = require("http-status-codes");
|
|||
const { sum, sumBy } = require("lodash");
|
||||
const db = require("../db");
|
||||
|
||||
// getStatus returns the server's current health check status
|
||||
function getStatus() {
|
||||
const disabled = db.get("disabled").value();
|
||||
/**
|
||||
* Get status code that should be returned in the API response.
|
||||
* - OK (200) in case everything is healthy
|
||||
* - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled
|
||||
*/
|
||||
function getStatusCode() {
|
||||
// check whether the portal has been manually disabled
|
||||
const disabled = getDisabled();
|
||||
|
||||
if (disabled) {
|
||||
return StatusCodes.SERVICE_UNAVAILABLE;
|
||||
}
|
||||
|
||||
// Grab entry element from DB
|
||||
const entry = db.get("entries").orderBy("date", "desc").head().value();
|
||||
// grab one most recent critical entry element from DB
|
||||
const entry = getCurrentCriticalEntry();
|
||||
|
||||
// Check that every critical check entry is up
|
||||
if (entry && entry.checks.every(({ up, critical }) => up && critical)) {
|
||||
// find out whether every check in the entry is up
|
||||
if (entry && entry.checks.every(({ up }) => up)) {
|
||||
return StatusCodes.OK;
|
||||
}
|
||||
|
||||
// At least one check failed
|
||||
// in case at least one check failed
|
||||
return StatusCodes.SERVICE_UNAVAILABLE;
|
||||
}
|
||||
|
||||
// getTimeout returns the average time out from a sample of 10 health check
|
||||
// entries.
|
||||
function getTimeout() {
|
||||
if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
// Grab 10 entries from the database as a sample to determine the average
|
||||
// timeout for the server.
|
||||
/**
|
||||
* Get the sample of most recent critical entries and
|
||||
* calculate the avarage response time of all of them
|
||||
*/
|
||||
function getAvarageResponseTime() {
|
||||
// get most recent 10 successfull checks for the calculation
|
||||
const sample = db
|
||||
.get("entries")
|
||||
.get("critical")
|
||||
.orderBy("date", "desc")
|
||||
.filter(({ checks }) => checks.every(({ up, critical }) => up && critical))
|
||||
.filter(({ checks }) => checks.every(({ up }) => up))
|
||||
.take(10)
|
||||
.value();
|
||||
|
||||
// Return average timeout
|
||||
// calculate avarage time of response
|
||||
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
|
||||
}
|
||||
|
||||
// getEntriesSinceYesterday gets the health check entries since yesterday
|
||||
function getEntriesSinceYesterday() {
|
||||
const yesterday = new Date();
|
||||
/**
|
||||
* Get one, most current critical entry
|
||||
*/
|
||||
function getCurrentCriticalEntry() {
|
||||
return db.get("critical").orderBy("date", "desc").head().value();
|
||||
}
|
||||
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
return db
|
||||
.get("entries")
|
||||
.orderBy("date", "desc")
|
||||
.filter(({ date }) => date >= yesterday.toISOString())
|
||||
.value();
|
||||
/**
|
||||
* Get the disabled flag state (manual portal disable)
|
||||
*/
|
||||
function getDisabled() {
|
||||
return db.get("disabled").value();
|
||||
}
|
||||
|
||||
module.exports = (req, res) => {
|
||||
const statusCode = getStatusCode();
|
||||
const timeout = statusCode === StatusCodes.OK ? getAvarageResponseTime() : 0;
|
||||
|
||||
// We want to delay the response for the load balancer to be able to prioritize
|
||||
// servers based on the successful response time of thid endpoint. Load balancer
|
||||
// will pull the server if the response is an error so there is no point in delaying
|
||||
// failures, hence 0 timeout on those.
|
||||
setTimeout(() => {
|
||||
res.status(getStatus()).send(getEntriesSinceYesterday());
|
||||
}, getTimeout());
|
||||
// include some health information in the response body
|
||||
const entry = getCurrentCriticalEntry();
|
||||
const disabled = getDisabled();
|
||||
|
||||
res.status(statusCode).send({ disabled, entry });
|
||||
}, timeout);
|
||||
};
|
||||
|
|
|
@ -1,16 +0,0 @@
|
|||
const db = require("../db");
|
||||
|
||||
// returns all health check entries that are not older than one day
|
||||
module.exports = (req, res) => {
|
||||
const yesterday = new Date();
|
||||
|
||||
yesterday.setDate(yesterday.getDate() - 1);
|
||||
|
||||
const entries = db
|
||||
.get("entries")
|
||||
.orderBy("date", "desc")
|
||||
.filter(({ date }) => date >= yesterday.toISOString())
|
||||
.value();
|
||||
|
||||
res.send(entries);
|
||||
};
|
|
@ -0,0 +1,8 @@
|
|||
const db = require("../db");
|
||||
|
||||
// returns all verbose health check entries
|
||||
module.exports = (req, res) => {
|
||||
const entries = db.get("verbose").orderBy("date", "desc").value();
|
||||
|
||||
res.send(entries);
|
||||
};
|
|
@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory");
|
|||
|
||||
if (!fs.existsSync("state")) fs.mkdirSync("state");
|
||||
|
||||
const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory();
|
||||
const adapter = new FileSync("state/state.json");
|
||||
const db = low(adapter);
|
||||
|
||||
db.defaults({ disabled: false, entries: [] }).write();
|
||||
db.defaults({ disabled: false, critical: [], verbose: [] }).write();
|
||||
|
||||
module.exports = db;
|
||||
|
|
|
@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false }));
|
|||
server.use(bodyparser.json());
|
||||
|
||||
server.get("/health-check", require("./api/index"));
|
||||
server.get("/health-check/recent", require("./api/recent"));
|
||||
server.get("/health-check/all", require("./api/all"));
|
||||
server.get("/health-check/critical", require("./api/critical"));
|
||||
server.get("/health-check/verbose", require("./api/verbose"));
|
||||
server.get("/health-check/disabled", require("./api/disabled"));
|
||||
|
||||
server.listen(port, host, (error) => {
|
||||
|
|
|
@ -4,12 +4,12 @@ const { criticalChecks } = require("./checks/critical");
|
|||
const { verboseChecks } = require("./checks/verbose");
|
||||
|
||||
// execute the critical health-check script every 5 minutes
|
||||
const basicJob = schedule.scheduleJob("*/5 * * * *", async () => {
|
||||
const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => {
|
||||
const entry = { date: new Date().toISOString(), checks: [] };
|
||||
|
||||
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
|
||||
|
||||
db.get("entries").push(entry).write();
|
||||
db.get("critical").push(entry).write();
|
||||
});
|
||||
|
||||
// execute the verbose health-check script once per hour
|
||||
|
@ -18,11 +18,11 @@ const verboseJob = schedule.scheduleJob("0 * * * *", async () => {
|
|||
|
||||
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
|
||||
|
||||
db.get("entries").push(entry).write();
|
||||
db.get("verbose").push(entry).write();
|
||||
});
|
||||
|
||||
// Launch Health check jobs
|
||||
setTimeout(() => {
|
||||
basicJob.invoke();
|
||||
criticalJob.invoke();
|
||||
verboseJob.invoke();
|
||||
}, 60 * 1000); // delay for 60s to give other services time to start up
|
||||
}, 60 * 0); // delay for 60s to give other services time to start up
|
||||
|
|
Reference in New Issue