Merge pull request #1179 from SkynetLabs/disabled-with-reason
make disabled health-check require reason
This commit is contained in:
commit
49d5906464
|
@ -0,0 +1,80 @@
|
||||||
|
#!/usr/bin/env node
|
||||||
|
|
||||||
|
process.env.NODE_ENV = process.env.NODE_ENV || "production";
|
||||||
|
|
||||||
|
require("yargs/yargs")(process.argv.slice(2))
|
||||||
|
.command(
|
||||||
|
"enable",
|
||||||
|
"Mark portal as enabled",
|
||||||
|
() => {},
|
||||||
|
() => {
|
||||||
|
const db = require("./src/db");
|
||||||
|
|
||||||
|
db.set("disabled", false).write();
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.command(
|
||||||
|
"disable <reason>",
|
||||||
|
"Mark portal as disabled (provide meaningful reason)",
|
||||||
|
() => {},
|
||||||
|
({ reason }) => {
|
||||||
|
const db = require("./src/db");
|
||||||
|
|
||||||
|
db.set("disabled", reason).write();
|
||||||
|
}
|
||||||
|
)
|
||||||
|
.command(
|
||||||
|
"run <type>",
|
||||||
|
"Skynet portal health checks",
|
||||||
|
(yargs) => {
|
||||||
|
yargs
|
||||||
|
.positional("type", {
|
||||||
|
describe: "Type of checks to run",
|
||||||
|
type: "string",
|
||||||
|
choices: ["critical", "extended"],
|
||||||
|
})
|
||||||
|
.option("portal-url", {
|
||||||
|
describe: "Skynet portal url",
|
||||||
|
default: process.env.SKYNET_PORTAL_API || "https://siasky.net",
|
||||||
|
type: "string",
|
||||||
|
})
|
||||||
|
.option("state-dir", {
|
||||||
|
describe: "State directory",
|
||||||
|
default: process.env.STATE_DIR || "state",
|
||||||
|
type: "string",
|
||||||
|
});
|
||||||
|
},
|
||||||
|
async ({ type, portalUrl, stateDir }) => {
|
||||||
|
process.env.SKYNET_PORTAL_API = portalUrl;
|
||||||
|
process.env.STATE_DIR = stateDir;
|
||||||
|
|
||||||
|
const util = require("util");
|
||||||
|
const { getYesterdayISOString } = require("./src/utils");
|
||||||
|
const createMiddleware = require("./src/checks/middleware");
|
||||||
|
const db = require("./src/db");
|
||||||
|
const checks = require(`./src/checks/${type}`);
|
||||||
|
const middleware = await createMiddleware();
|
||||||
|
|
||||||
|
const entry = {
|
||||||
|
date: new Date().toISOString(),
|
||||||
|
checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware),
|
||||||
|
};
|
||||||
|
|
||||||
|
db.read() // read before writing to make sure no external changes are overwritten
|
||||||
|
.get(type) // get the list of records of given type
|
||||||
|
.push(entry) // insert new record
|
||||||
|
.remove(({ date }) => date < getYesterdayISOString()) // drop old records
|
||||||
|
.write();
|
||||||
|
|
||||||
|
// exit with code 1 if any of the checks report failure
|
||||||
|
if (entry.checks.some(({ up }) => !up)) {
|
||||||
|
console.log(
|
||||||
|
util.inspect(
|
||||||
|
entry.checks.filter(({ up }) => !up),
|
||||||
|
{ colors: true, depth: 7 } // increase depth to ensure errors are printed
|
||||||
|
)
|
||||||
|
);
|
||||||
|
process.exit(1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
).argv;
|
|
@ -1,7 +0,0 @@
|
||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
process.env.NODE_ENV = process.env.NODE_ENV || "production";
|
|
||||||
|
|
||||||
const db = require("../src/db");
|
|
||||||
|
|
||||||
db.set("disabled", true).write();
|
|
|
@ -1,7 +0,0 @@
|
||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
process.env.NODE_ENV = process.env.NODE_ENV || "production";
|
|
||||||
|
|
||||||
const db = require("../src/db");
|
|
||||||
|
|
||||||
db.set("disabled", false).write();
|
|
|
@ -1,5 +0,0 @@
|
||||||
#!/usr/bin/env node
|
|
||||||
|
|
||||||
process.env.NODE_ENV = process.env.NODE_ENV || "production";
|
|
||||||
|
|
||||||
require("../src/run.js");
|
|
|
@ -1,56 +0,0 @@
|
||||||
const util = require("util");
|
|
||||||
const { getYesterdayISOString } = require("./utils");
|
|
||||||
const createMiddleware = require("./checks/middleware");
|
|
||||||
|
|
||||||
require("yargs/yargs")(process.argv.slice(2)).command(
|
|
||||||
"$0 <type>",
|
|
||||||
"Skynet portal health checks",
|
|
||||||
(yargs) => {
|
|
||||||
yargs
|
|
||||||
.positional("type", {
|
|
||||||
describe: "Type of checks to run",
|
|
||||||
type: "string",
|
|
||||||
choices: ["critical", "extended"],
|
|
||||||
})
|
|
||||||
.option("portal-url", {
|
|
||||||
describe: "Skynet portal url",
|
|
||||||
default: process.env.SKYNET_PORTAL_API || "https://siasky.net",
|
|
||||||
type: "string",
|
|
||||||
})
|
|
||||||
.option("state-dir", {
|
|
||||||
describe: "State directory",
|
|
||||||
default: process.env.STATE_DIR || "state",
|
|
||||||
type: "string",
|
|
||||||
});
|
|
||||||
},
|
|
||||||
async ({ type, portalUrl, stateDir }) => {
|
|
||||||
process.env.SKYNET_PORTAL_API = portalUrl;
|
|
||||||
process.env.STATE_DIR = stateDir;
|
|
||||||
|
|
||||||
const db = require("../src/db");
|
|
||||||
const checks = require(`../src/checks/${type}`);
|
|
||||||
const middleware = await createMiddleware();
|
|
||||||
|
|
||||||
const entry = {
|
|
||||||
date: new Date().toISOString(),
|
|
||||||
checks: (await Promise.all(checks.map((check) => new Promise(check)))).map(middleware),
|
|
||||||
};
|
|
||||||
|
|
||||||
db.read() // read before writing to make sure no external changes are overwritten
|
|
||||||
.get(type) // get the list of records of given type
|
|
||||||
.push(entry) // insert new record
|
|
||||||
.remove(({ date }) => date < getYesterdayISOString()) // drop old records
|
|
||||||
.write();
|
|
||||||
|
|
||||||
// exit with code 1 if any of the checks report failure
|
|
||||||
if (entry.checks.some(({ up }) => !up)) {
|
|
||||||
console.log(
|
|
||||||
util.inspect(
|
|
||||||
entry.checks.filter(({ up }) => !up),
|
|
||||||
{ colors: true, depth: 7 } // increase depth to ensure errors are printed
|
|
||||||
)
|
|
||||||
);
|
|
||||||
process.exit(1);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
).argv;
|
|
|
@ -133,7 +133,7 @@ async def check_health():
|
||||||
res = requests.get(endpoint + "/health-check", verify=False)
|
res = requests.get(endpoint + "/health-check", verify=False)
|
||||||
json_check = res.json()
|
json_check = res.json()
|
||||||
|
|
||||||
server_down = res.status_code is not requests.codes["ok"]
|
server_failure = res.status_code is not requests.codes["ok"] and json_check["disabled"] == False:
|
||||||
|
|
||||||
res = requests.get(endpoint + "/health-check/critical", verify=False)
|
res = requests.get(endpoint + "/health-check/critical", verify=False)
|
||||||
json_critical = res.json()
|
json_critical = res.json()
|
||||||
|
@ -201,10 +201,8 @@ async def check_health():
|
||||||
message = ""
|
message = ""
|
||||||
force_notify = False
|
force_notify = False
|
||||||
|
|
||||||
if json_check["disabled"]:
|
if server_failure:
|
||||||
message += "__Portal manually disabled!__ "
|
message += "__Server down!!!__ "
|
||||||
elif server_down:
|
|
||||||
message += "__Portal down!!!__ "
|
|
||||||
force_notify = True
|
force_notify = True
|
||||||
|
|
||||||
if critical_checks_failed:
|
if critical_checks_failed:
|
||||||
|
@ -229,7 +227,6 @@ async def check_health():
|
||||||
# send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM
|
# send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM
|
||||||
if (
|
if (
|
||||||
force_notify
|
force_notify
|
||||||
or json_check["disabled"]
|
|
||||||
or failed_records_file
|
or failed_records_file
|
||||||
or datetime.utcnow().hour == 1
|
or datetime.utcnow().hour == 1
|
||||||
):
|
):
|
||||||
|
|
Reference in New Issue