restructure health-check architecture
This commit is contained in:
parent
64cd7b135c
commit
307d736a69
|
@ -1,8 +0,0 @@
|
||||||
const db = require("../db");
|
|
||||||
|
|
||||||
// returns all health check entries
|
|
||||||
module.exports = (req, res) => {
|
|
||||||
const entries = db.get("entries").orderBy("date", "desc").value();
|
|
||||||
|
|
||||||
res.send(entries);
|
|
||||||
};
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
const db = require("../db");
|
||||||
|
|
||||||
|
// returns all critical health check entries
|
||||||
|
module.exports = (req, res) => {
|
||||||
|
const entries = db.get("critical").orderBy("date", "desc").value();
|
||||||
|
|
||||||
|
res.send(entries);
|
||||||
|
};
|
|
@ -2,61 +2,75 @@ const { StatusCodes } = require("http-status-codes");
|
||||||
const { sum, sumBy } = require("lodash");
|
const { sum, sumBy } = require("lodash");
|
||||||
const db = require("../db");
|
const db = require("../db");
|
||||||
|
|
||||||
// getStatus returns the server's current health check status
|
/**
|
||||||
function getStatus() {
|
* Get status code that should be returned in the API response.
|
||||||
const disabled = db.get("disabled").value();
|
* - OK (200) in case everything is healthy
|
||||||
|
* - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled
|
||||||
|
*/
|
||||||
|
function getStatusCode() {
|
||||||
|
// check whether the portal has been manually disabled
|
||||||
|
const disabled = getDisabled();
|
||||||
|
|
||||||
if (disabled) {
|
if (disabled) {
|
||||||
return StatusCodes.SERVICE_UNAVAILABLE;
|
return StatusCodes.SERVICE_UNAVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Grab entry element from DB
|
// grab one most recent critical entry element from DB
|
||||||
const entry = db.get("entries").orderBy("date", "desc").head().value();
|
const entry = getCurrentCriticalEntry();
|
||||||
|
|
||||||
// Check that every critical check entry is up
|
// find out whether every check in the entry is up
|
||||||
if (entry && entry.checks.every(({ up, critical }) => up && critical)) {
|
if (entry && entry.checks.every(({ up }) => up)) {
|
||||||
return StatusCodes.OK;
|
return StatusCodes.OK;
|
||||||
}
|
}
|
||||||
|
|
||||||
// At least one check failed
|
// in case at least one check failed
|
||||||
return StatusCodes.SERVICE_UNAVAILABLE;
|
return StatusCodes.SERVICE_UNAVAILABLE;
|
||||||
}
|
}
|
||||||
|
|
||||||
// getTimeout returns the average time out from a sample of 10 health check
|
/**
|
||||||
// entries.
|
* Get the sample of most recent critical entries and
|
||||||
function getTimeout() {
|
* calculate the avarage response time of all of them
|
||||||
if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) {
|
*/
|
||||||
return 0;
|
function getAvarageResponseTime() {
|
||||||
}
|
// get most recent 10 successfull checks for the calculation
|
||||||
|
|
||||||
// Grab 10 entries from the database as a sample to determine the average
|
|
||||||
// timeout for the server.
|
|
||||||
const sample = db
|
const sample = db
|
||||||
.get("entries")
|
.get("critical")
|
||||||
.orderBy("date", "desc")
|
.orderBy("date", "desc")
|
||||||
.filter(({ checks }) => checks.every(({ up, critical }) => up && critical))
|
.filter(({ checks }) => checks.every(({ up }) => up))
|
||||||
.take(10)
|
.take(10)
|
||||||
.value();
|
.value();
|
||||||
|
|
||||||
// Return average timeout
|
// calculate avarage time of response
|
||||||
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
|
return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size);
|
||||||
}
|
}
|
||||||
|
|
||||||
// getEntriesSinceYesterday gets the health check entries since yesterday
|
/**
|
||||||
function getEntriesSinceYesterday() {
|
* Get one, most current critical entry
|
||||||
const yesterday = new Date();
|
*/
|
||||||
|
function getCurrentCriticalEntry() {
|
||||||
|
return db.get("critical").orderBy("date", "desc").head().value();
|
||||||
|
}
|
||||||
|
|
||||||
yesterday.setDate(yesterday.getDate() - 1);
|
/**
|
||||||
|
* Get the disabled flag state (manual portal disable)
|
||||||
return db
|
*/
|
||||||
.get("entries")
|
function getDisabled() {
|
||||||
.orderBy("date", "desc")
|
return db.get("disabled").value();
|
||||||
.filter(({ date }) => date >= yesterday.toISOString())
|
|
||||||
.value();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
module.exports = (req, res) => {
|
module.exports = (req, res) => {
|
||||||
|
const statusCode = getStatusCode();
|
||||||
|
const timeout = statusCode === StatusCodes.OK ? getAvarageResponseTime() : 0;
|
||||||
|
|
||||||
|
// We want to delay the response for the load balancer to be able to prioritize
|
||||||
|
// servers based on the successful response time of thid endpoint. Load balancer
|
||||||
|
// will pull the server if the response is an error so there is no point in delaying
|
||||||
|
// failures, hence 0 timeout on those.
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
res.status(getStatus()).send(getEntriesSinceYesterday());
|
// include some health information in the response body
|
||||||
}, getTimeout());
|
const entry = getCurrentCriticalEntry();
|
||||||
|
const disabled = getDisabled();
|
||||||
|
|
||||||
|
res.status(statusCode).send({ disabled, entry });
|
||||||
|
}, timeout);
|
||||||
};
|
};
|
||||||
|
|
|
@ -1,16 +0,0 @@
|
||||||
const db = require("../db");
|
|
||||||
|
|
||||||
// returns all health check entries that are not older than one day
|
|
||||||
module.exports = (req, res) => {
|
|
||||||
const yesterday = new Date();
|
|
||||||
|
|
||||||
yesterday.setDate(yesterday.getDate() - 1);
|
|
||||||
|
|
||||||
const entries = db
|
|
||||||
.get("entries")
|
|
||||||
.orderBy("date", "desc")
|
|
||||||
.filter(({ date }) => date >= yesterday.toISOString())
|
|
||||||
.value();
|
|
||||||
|
|
||||||
res.send(entries);
|
|
||||||
};
|
|
|
@ -0,0 +1,8 @@
|
||||||
|
const db = require("../db");
|
||||||
|
|
||||||
|
// returns all verbose health check entries
|
||||||
|
module.exports = (req, res) => {
|
||||||
|
const entries = db.get("verbose").orderBy("date", "desc").value();
|
||||||
|
|
||||||
|
res.send(entries);
|
||||||
|
};
|
|
@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory");
|
||||||
|
|
||||||
if (!fs.existsSync("state")) fs.mkdirSync("state");
|
if (!fs.existsSync("state")) fs.mkdirSync("state");
|
||||||
|
|
||||||
const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory();
|
const adapter = new FileSync("state/state.json");
|
||||||
const db = low(adapter);
|
const db = low(adapter);
|
||||||
|
|
||||||
db.defaults({ disabled: false, entries: [] }).write();
|
db.defaults({ disabled: false, critical: [], verbose: [] }).write();
|
||||||
|
|
||||||
module.exports = db;
|
module.exports = db;
|
||||||
|
|
|
@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false }));
|
||||||
server.use(bodyparser.json());
|
server.use(bodyparser.json());
|
||||||
|
|
||||||
server.get("/health-check", require("./api/index"));
|
server.get("/health-check", require("./api/index"));
|
||||||
server.get("/health-check/recent", require("./api/recent"));
|
server.get("/health-check/critical", require("./api/critical"));
|
||||||
server.get("/health-check/all", require("./api/all"));
|
server.get("/health-check/verbose", require("./api/verbose"));
|
||||||
server.get("/health-check/disabled", require("./api/disabled"));
|
server.get("/health-check/disabled", require("./api/disabled"));
|
||||||
|
|
||||||
server.listen(port, host, (error) => {
|
server.listen(port, host, (error) => {
|
||||||
|
|
|
@ -4,12 +4,12 @@ const { criticalChecks } = require("./checks/critical");
|
||||||
const { verboseChecks } = require("./checks/verbose");
|
const { verboseChecks } = require("./checks/verbose");
|
||||||
|
|
||||||
// execute the critical health-check script every 5 minutes
|
// execute the critical health-check script every 5 minutes
|
||||||
const basicJob = schedule.scheduleJob("*/5 * * * *", async () => {
|
const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => {
|
||||||
const entry = { date: new Date().toISOString(), checks: [] };
|
const entry = { date: new Date().toISOString(), checks: [] };
|
||||||
|
|
||||||
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
|
entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check)));
|
||||||
|
|
||||||
db.get("entries").push(entry).write();
|
db.get("critical").push(entry).write();
|
||||||
});
|
});
|
||||||
|
|
||||||
// execute the verbose health-check script once per hour
|
// execute the verbose health-check script once per hour
|
||||||
|
@ -18,11 +18,11 @@ const verboseJob = schedule.scheduleJob("0 * * * *", async () => {
|
||||||
|
|
||||||
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
|
entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check)));
|
||||||
|
|
||||||
db.get("entries").push(entry).write();
|
db.get("verbose").push(entry).write();
|
||||||
});
|
});
|
||||||
|
|
||||||
// Launch Health check jobs
|
// Launch Health check jobs
|
||||||
setTimeout(() => {
|
setTimeout(() => {
|
||||||
basicJob.invoke();
|
criticalJob.invoke();
|
||||||
verboseJob.invoke();
|
verboseJob.invoke();
|
||||||
}, 60 * 1000); // delay for 60s to give other services time to start up
|
}, 60 * 0); // delay for 60s to give other services time to start up
|
||||||
|
|
Reference in New Issue