diff --git a/packages/health-check/src/api/all.js b/packages/health-check/src/api/all.js deleted file mode 100644 index 782e0e52..00000000 --- a/packages/health-check/src/api/all.js +++ /dev/null @@ -1,8 +0,0 @@ -const db = require("../db"); - -// returns all health check entries -module.exports = (req, res) => { - const entries = db.get("entries").orderBy("date", "desc").value(); - - res.send(entries); -}; diff --git a/packages/health-check/src/api/critical.js b/packages/health-check/src/api/critical.js new file mode 100644 index 00000000..6837d17e --- /dev/null +++ b/packages/health-check/src/api/critical.js @@ -0,0 +1,8 @@ +const db = require("../db"); + +// returns all critical health check entries +module.exports = (req, res) => { + const entries = db.get("critical").orderBy("date", "desc").value(); + + res.send(entries); +}; diff --git a/packages/health-check/src/api/index.js b/packages/health-check/src/api/index.js index 733fb61d..0f70c212 100644 --- a/packages/health-check/src/api/index.js +++ b/packages/health-check/src/api/index.js @@ -2,61 +2,75 @@ const { StatusCodes } = require("http-status-codes"); const { sum, sumBy } = require("lodash"); const db = require("../db"); -// getStatus returns the server's current health check status -function getStatus() { - const disabled = db.get("disabled").value(); +/** + * Get status code that should be returned in the API response. + * - OK (200) in case everything is healthy + * - SERVICE_UNAVAILABLE (503) in case of any failures or if disabled + */ +function getStatusCode() { + // check whether the portal has been manually disabled + const disabled = getDisabled(); if (disabled) { return StatusCodes.SERVICE_UNAVAILABLE; } - // Grab entry element from DB - const entry = db.get("entries").orderBy("date", "desc").head().value(); + // grab one most recent critical entry element from DB + const entry = getCurrentCriticalEntry(); - // Check that every critical check entry is up - if (entry && entry.checks.every(({ up, critical }) => up && critical)) { + // find out whether every check in the entry is up + if (entry && entry.checks.every(({ up }) => up)) { return StatusCodes.OK; } - // At least one check failed + // in case at least one check failed return StatusCodes.SERVICE_UNAVAILABLE; } -// getTimeout returns the average time out from a sample of 10 health check -// entries. -function getTimeout() { - if (getStatus() === StatusCodes.SERVICE_UNAVAILABLE) { - return 0; - } - - // Grab 10 entries from the database as a sample to determine the average - // timeout for the server. +/** + * Get the sample of most recent critical entries and + * calculate the avarage response time of all of them + */ +function getAvarageResponseTime() { + // get most recent 10 successfull checks for the calculation const sample = db - .get("entries") + .get("critical") .orderBy("date", "desc") - .filter(({ checks }) => checks.every(({ up, critical }) => up && critical)) + .filter(({ checks }) => checks.every(({ up }) => up)) .take(10) .value(); - // Return average timeout + // calculate avarage time of response return Math.round(sum(sample.map(({ checks }) => sumBy(checks, "time"))) / sample.size); } -// getEntriesSinceYesterday gets the health check entries since yesterday -function getEntriesSinceYesterday() { - const yesterday = new Date(); +/** + * Get one, most current critical entry + */ +function getCurrentCriticalEntry() { + return db.get("critical").orderBy("date", "desc").head().value(); +} - yesterday.setDate(yesterday.getDate() - 1); - - return db - .get("entries") - .orderBy("date", "desc") - .filter(({ date }) => date >= yesterday.toISOString()) - .value(); +/** + * Get the disabled flag state (manual portal disable) + */ +function getDisabled() { + return db.get("disabled").value(); } module.exports = (req, res) => { + const statusCode = getStatusCode(); + const timeout = statusCode === StatusCodes.OK ? getAvarageResponseTime() : 0; + + // We want to delay the response for the load balancer to be able to prioritize + // servers based on the successful response time of thid endpoint. Load balancer + // will pull the server if the response is an error so there is no point in delaying + // failures, hence 0 timeout on those. setTimeout(() => { - res.status(getStatus()).send(getEntriesSinceYesterday()); - }, getTimeout()); + // include some health information in the response body + const entry = getCurrentCriticalEntry(); + const disabled = getDisabled(); + + res.status(statusCode).send({ disabled, entry }); + }, timeout); }; diff --git a/packages/health-check/src/api/recent.js b/packages/health-check/src/api/recent.js deleted file mode 100644 index 600b5814..00000000 --- a/packages/health-check/src/api/recent.js +++ /dev/null @@ -1,16 +0,0 @@ -const db = require("../db"); - -// returns all health check entries that are not older than one day -module.exports = (req, res) => { - const yesterday = new Date(); - - yesterday.setDate(yesterday.getDate() - 1); - - const entries = db - .get("entries") - .orderBy("date", "desc") - .filter(({ date }) => date >= yesterday.toISOString()) - .value(); - - res.send(entries); -}; diff --git a/packages/health-check/src/api/verbose.js b/packages/health-check/src/api/verbose.js new file mode 100644 index 00000000..01a3a666 --- /dev/null +++ b/packages/health-check/src/api/verbose.js @@ -0,0 +1,8 @@ +const db = require("../db"); + +// returns all verbose health check entries +module.exports = (req, res) => { + const entries = db.get("verbose").orderBy("date", "desc").value(); + + res.send(entries); +}; diff --git a/packages/health-check/src/db.js b/packages/health-check/src/db.js index 48a734d6..bb4e7aad 100644 --- a/packages/health-check/src/db.js +++ b/packages/health-check/src/db.js @@ -5,9 +5,9 @@ const Memory = require("lowdb/adapters/Memory"); if (!fs.existsSync("state")) fs.mkdirSync("state"); -const adapter = process.env.NODE_ENV === "production" ? new FileSync("state/state.json") : new Memory(); +const adapter = new FileSync("state/state.json"); const db = low(adapter); -db.defaults({ disabled: false, entries: [] }).write(); +db.defaults({ disabled: false, critical: [], verbose: [] }).write(); module.exports = db; diff --git a/packages/health-check/src/index.js b/packages/health-check/src/index.js index 28838845..169e005b 100644 --- a/packages/health-check/src/index.js +++ b/packages/health-check/src/index.js @@ -18,8 +18,8 @@ server.use(bodyparser.urlencoded({ extended: false })); server.use(bodyparser.json()); server.get("/health-check", require("./api/index")); -server.get("/health-check/recent", require("./api/recent")); -server.get("/health-check/all", require("./api/all")); +server.get("/health-check/critical", require("./api/critical")); +server.get("/health-check/verbose", require("./api/verbose")); server.get("/health-check/disabled", require("./api/disabled")); server.listen(port, host, (error) => { diff --git a/packages/health-check/src/schedule.js b/packages/health-check/src/schedule.js index 72dc321b..0901361d 100644 --- a/packages/health-check/src/schedule.js +++ b/packages/health-check/src/schedule.js @@ -4,12 +4,12 @@ const { criticalChecks } = require("./checks/critical"); const { verboseChecks } = require("./checks/verbose"); // execute the critical health-check script every 5 minutes -const basicJob = schedule.scheduleJob("*/5 * * * *", async () => { +const criticalJob = schedule.scheduleJob("*/5 * * * *", async () => { const entry = { date: new Date().toISOString(), checks: [] }; entry.checks = await Promise.all(criticalChecks.map((check) => new Promise(check))); - db.get("entries").push(entry).write(); + db.get("critical").push(entry).write(); }); // execute the verbose health-check script once per hour @@ -18,11 +18,11 @@ const verboseJob = schedule.scheduleJob("0 * * * *", async () => { entry.checks = await Promise.all(verboseChecks.map((check) => new Promise(check))); - db.get("entries").push(entry).write(); + db.get("verbose").push(entry).write(); }); // Launch Health check jobs setTimeout(() => { - basicJob.invoke(); + criticalJob.invoke(); verboseJob.invoke(); -}, 60 * 1000); // delay for 60s to give other services time to start up +}, 60 * 0); // delay for 60s to give other services time to start up