Merge pull request #1179 from SkynetLabs/disabled-with-reason

make disabled health-check require reason
This commit is contained in:
Karol Wypchło 2021-09-10 19:01:59 +02:00 committed by GitHub
commit 49d5906464
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
6 changed files with 83 additions and 81 deletions

80
packages/health-check/cli Executable file
View File

@ -0,0 +1,80 @@
#!/usr/bin/env node
process.env.NODE_ENV = process.env.NODE_ENV || "production";
require("yargs/yargs")(process.argv.slice(2))
.command(
"enable",
"Mark portal as enabled",
() => {},
() => {
const db = require("./src/db");
db.set("disabled", false).write();
}
)
.command(
"disable <reason>",
"Mark portal as disabled (provide meaningful reason)",
() => {},
({ reason }) => {
const db = require("./src/db");
db.set("disabled", reason).write();
}
)
.command(
"run <type>",
"Skynet portal health checks",
(yargs) => {
yargs
.positional("type", {
describe: "Type of checks to run",
type: "string",
choices: ["critical", "extended"],
})
.option("portal-url", {
describe: "Skynet portal url",
default: process.env.SKYNET_PORTAL_API || "https://siasky.net",
type: "string",
})
.option("state-dir", {
describe: "State directory",
default: process.env.STATE_DIR || "state",
type: "string",
});
},
async ({ type, portalUrl, stateDir }) => {
process.env.SKYNET_PORTAL_API = portalUrl;
process.env.STATE_DIR = stateDir;
const util = require("util");
const { getYesterdayISOString } = require("./src/utils");
const createMiddleware = require("./src/checks/middleware");
const db = require("./src/db");
const checks = require(`./src/checks/${type}`);
const middleware = await createMiddleware();
const entry = {
date: new Date().toISOString(),
checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware),
};
db.read() // read before writing to make sure no external changes are overwritten
.get(type) // get the list of records of given type
.push(entry) // insert new record
.remove(({ date }) => date < getYesterdayISOString()) // drop old records
.write();
// exit with code 1 if any of the checks report failure
if (entry.checks.some(({ up }) => !up)) {
console.log(
util.inspect(
entry.checks.filter(({ up }) => !up),
{ colors: true, depth: 7 } // increase depth to ensure errors are printed
)
);
process.exit(1);
}
}
).argv;

View File

@ -1,7 +0,0 @@
#!/usr/bin/env node
process.env.NODE_ENV = process.env.NODE_ENV || "production";
const db = require("../src/db");
db.set("disabled", true).write();

View File

@ -1,7 +0,0 @@
#!/usr/bin/env node
process.env.NODE_ENV = process.env.NODE_ENV || "production";
const db = require("../src/db");
db.set("disabled", false).write();

View File

@ -1,5 +0,0 @@
#!/usr/bin/env node
process.env.NODE_ENV = process.env.NODE_ENV || "production";
require("../src/run.js");

View File

@ -1,56 +0,0 @@
const util = require("util");
const { getYesterdayISOString } = require("./utils");
const createMiddleware = require("./checks/middleware");
require("yargs/yargs")(process.argv.slice(2)).command(
"$0 <type>",
"Skynet portal health checks",
(yargs) => {
yargs
.positional("type", {
describe: "Type of checks to run",
type: "string",
choices: ["critical", "extended"],
})
.option("portal-url", {
describe: "Skynet portal url",
default: process.env.SKYNET_PORTAL_API || "https://siasky.net",
type: "string",
})
.option("state-dir", {
describe: "State directory",
default: process.env.STATE_DIR || "state",
type: "string",
});
},
async ({ type, portalUrl, stateDir }) => {
process.env.SKYNET_PORTAL_API = portalUrl;
process.env.STATE_DIR = stateDir;
const db = require("../src/db");
const checks = require(`../src/checks/${type}`);
const middleware = await createMiddleware();
const entry = {
date: new Date().toISOString(),
checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware),
};
db.read() // read before writing to make sure no external changes are overwritten
.get(type) // get the list of records of given type
.push(entry) // insert new record
.remove(({ date }) => date < getYesterdayISOString()) // drop old records
.write();
// exit with code 1 if any of the checks report failure
if (entry.checks.some(({ up }) => !up)) {
console.log(
util.inspect(
entry.checks.filter(({ up }) => !up),
{ colors: true, depth: 7 } // increase depth to ensure errors are printed
)
);
process.exit(1);
}
}
).argv;

View File

@ -133,7 +133,7 @@ async def check_health():
res = requests.get(endpoint + "/health-check", verify=False) res = requests.get(endpoint + "/health-check", verify=False)
json_check = res.json() json_check = res.json()
server_down = res.status_code is not requests.codes["ok"] server_failure = res.status_code is not requests.codes["ok"] and json_check["disabled"] == False:
res = requests.get(endpoint + "/health-check/critical", verify=False) res = requests.get(endpoint + "/health-check/critical", verify=False)
json_critical = res.json() json_critical = res.json()
@ -201,10 +201,8 @@ async def check_health():
message = "" message = ""
force_notify = False force_notify = False
if json_check["disabled"]: if server_failure:
message += "__Portal manually disabled!__ " message += "__Server down!!!__ "
elif server_down:
message += "__Portal down!!!__ "
force_notify = True force_notify = True
if critical_checks_failed: if critical_checks_failed:
@ -229,7 +227,6 @@ async def check_health():
# send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM # send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM
if ( if (
force_notify force_notify
or json_check["disabled"]
or failed_records_file or failed_records_file
or datetime.utcnow().hour == 1 or datetime.utcnow().hour == 1
): ):