From bfc2cbd3f36b66e8d5f31285469e14bb56c23005 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Fri, 10 Sep 2021 16:19:24 +0200 Subject: [PATCH 1/3] make disabled health-check require reason --- packages/health-check/cli | 80 +++++++++++++++++++++++++++++++ packages/health-check/cli/disable | 7 --- packages/health-check/cli/enable | 7 --- packages/health-check/cli/run | 5 -- packages/health-check/src/run.js | 56 ---------------------- 5 files changed, 80 insertions(+), 75 deletions(-) create mode 100755 packages/health-check/cli delete mode 100755 packages/health-check/cli/disable delete mode 100755 packages/health-check/cli/enable delete mode 100755 packages/health-check/cli/run delete mode 100644 packages/health-check/src/run.js diff --git a/packages/health-check/cli b/packages/health-check/cli new file mode 100755 index 00000000..8e7ee861 --- /dev/null +++ b/packages/health-check/cli @@ -0,0 +1,80 @@ +#!/usr/bin/env node + +process.env.NODE_ENV = process.env.NODE_ENV || "production"; + +require("yargs/yargs")(process.argv.slice(2)) + .command( + "enable", + "Mark portal as enabled", + () => {}, + () => { + const db = require("./src/db"); + + db.set("disabled", false).write(); + } + ) + .command( + "disable ", + "Mark portal as disabled (provide meaningful reason)", + () => {}, + ({ reason }) => { + const db = require("./src/db"); + + db.set("disabled", reason).write(); + } + ) + .command( + "run ", + "Skynet portal health checks", + (yargs) => { + yargs + .positional("type", { + describe: "Type of checks to run", + type: "string", + choices: ["critical", "extended"], + }) + .option("portal-url", { + describe: "Skynet portal url", + default: process.env.SKYNET_PORTAL_API || "https://siasky.net", + type: "string", + }) + .option("state-dir", { + describe: "State directory", + default: process.env.STATE_DIR || "state", + type: "string", + }); + }, + async ({ type, portalUrl, stateDir }) => { + process.env.SKYNET_PORTAL_API = portalUrl; + process.env.STATE_DIR = stateDir; + + const util = require("util"); + const { getYesterdayISOString } = require("./src/utils"); + const createMiddleware = require("./src/checks/middleware"); + const db = require("./src/db"); + const checks = require(`./src/checks/${type}`); + const middleware = await createMiddleware(); + + const entry = { + date: new Date().toISOString(), + checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware), + }; + + db.read() // read before writing to make sure no external changes are overwritten + .get(type) // get the list of records of given type + .push(entry) // insert new record + .remove(({ date }) => date < getYesterdayISOString()) // drop old records + .write(); + + // exit with code 1 if any of the checks report failure + if (entry.checks.some(({ up }) => !up)) { + console.log( + util.inspect( + entry.checks.filter(({ up }) => !up), + { colors: true, depth: 7 } // increase depth to ensure errors are printed + ) + ); + process.exit(1); + } + } + ).argv; diff --git a/packages/health-check/cli/disable b/packages/health-check/cli/disable deleted file mode 100755 index 05736671..00000000 --- a/packages/health-check/cli/disable +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env node - -process.env.NODE_ENV = process.env.NODE_ENV || "production"; - -const db = require("../src/db"); - -db.set("disabled", true).write(); diff --git a/packages/health-check/cli/enable b/packages/health-check/cli/enable deleted file mode 100755 index 13cc1341..00000000 --- a/packages/health-check/cli/enable +++ /dev/null @@ -1,7 +0,0 @@ -#!/usr/bin/env node - -process.env.NODE_ENV = process.env.NODE_ENV || "production"; - -const db = require("../src/db"); - -db.set("disabled", false).write(); diff --git a/packages/health-check/cli/run b/packages/health-check/cli/run deleted file mode 100755 index a196b31b..00000000 --- a/packages/health-check/cli/run +++ /dev/null @@ -1,5 +0,0 @@ -#!/usr/bin/env node - -process.env.NODE_ENV = process.env.NODE_ENV || "production"; - -require("../src/run.js"); diff --git a/packages/health-check/src/run.js b/packages/health-check/src/run.js deleted file mode 100644 index d0606f47..00000000 --- a/packages/health-check/src/run.js +++ /dev/null @@ -1,56 +0,0 @@ -const util = require("util"); -const { getYesterdayISOString } = require("./utils"); -const createMiddleware = require("./checks/middleware"); - -require("yargs/yargs")(process.argv.slice(2)).command( - "$0 ", - "Skynet portal health checks", - (yargs) => { - yargs - .positional("type", { - describe: "Type of checks to run", - type: "string", - choices: ["critical", "extended"], - }) - .option("portal-url", { - describe: "Skynet portal url", - default: process.env.SKYNET_PORTAL_API || "https://siasky.net", - type: "string", - }) - .option("state-dir", { - describe: "State directory", - default: process.env.STATE_DIR || "state", - type: "string", - }); - }, - async ({ type, portalUrl, stateDir }) => { - process.env.SKYNET_PORTAL_API = portalUrl; - process.env.STATE_DIR = stateDir; - - const db = require("../src/db"); - const checks = require(`../src/checks/${type}`); - const middleware = await createMiddleware(); - - const entry = { - date: new Date().toISOString(), - checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware), - }; - - db.read() // read before writing to make sure no external changes are overwritten - .get(type) // get the list of records of given type - .push(entry) // insert new record - .remove(({ date }) => date < getYesterdayISOString()) // drop old records - .write(); - - // exit with code 1 if any of the checks report failure - if (entry.checks.some(({ up }) => !up)) { - console.log( - util.inspect( - entry.checks.filter(({ up }) => !up), - { colors: true, depth: 7 } // increase depth to ensure errors are printed - ) - ); - process.exit(1); - } - } -).argv; From 4fddbb2fc88bf987d32c8b8b18c83c3a40e7a346 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Fri, 10 Sep 2021 16:23:05 +0200 Subject: [PATCH 2/3] do not notify on portal disabled --- setup-scripts/health-checker.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 42df82ef..8d67e4e1 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -201,9 +201,7 @@ async def check_health(): message = "" force_notify = False - if json_check["disabled"]: - message += "__Portal manually disabled!__ " - elif server_down: + if server_down and json_check["disabled"] == False: message += "__Portal down!!!__ " force_notify = True From bcea4d5b9043ebd76ed0c0bad60b62f61c6467ce Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Fri, 10 Sep 2021 17:00:37 +0200 Subject: [PATCH 3/3] do not send message on server down --- setup-scripts/health-checker.py | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 8d67e4e1..8ce62a54 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -133,7 +133,7 @@ async def check_health(): res = requests.get(endpoint + "/health-check", verify=False) json_check = res.json() - server_down = res.status_code is not requests.codes["ok"] + server_failure = res.status_code is not requests.codes["ok"] and json_check["disabled"] == False: res = requests.get(endpoint + "/health-check/critical", verify=False) json_critical = res.json() @@ -201,8 +201,8 @@ async def check_health(): message = "" force_notify = False - if server_down and json_check["disabled"] == False: - message += "__Portal down!!!__ " + if server_failure: + message += "__Server down!!!__ " force_notify = True if critical_checks_failed: @@ -227,7 +227,6 @@ async def check_health(): # send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM if ( force_notify - or json_check["disabled"] or failed_records_file or datetime.utcnow().hour == 1 ):