From 4b1ac49ef19b1e46b4bf9f6a6b162c4cacb64b1b Mon Sep 17 00:00:00 2001 From: Marcin Jachymiak Date: Mon, 9 Mar 2020 11:38:23 -0400 Subject: [PATCH 1/5] Refactor bot_utils out of health-checker --- setup-scripts/bot_utils.py | 127 ++++++++++++++++++++++++++++ setup-scripts/health-checker.py | 143 +++++--------------------------- 2 files changed, 147 insertions(+), 123 deletions(-) create mode 100644 setup-scripts/bot_utils.py diff --git a/setup-scripts/bot_utils.py b/setup-scripts/bot_utils.py new file mode 100644 index 00000000..4e174097 --- /dev/null +++ b/setup-scripts/bot_utils.py @@ -0,0 +1,127 @@ +#!/usr/bin/env python3 + +from urllib.request import urlopen, Request +from dotenv import load_dotenv +from pathlib import Path + +import urllib, json, os, traceback, discord, sys + +# sc_precision is the number of hastings per siacoin +sc_precision = 10 ** 24 + +channel_name = "skynet-portal-health-check" + +# Environment variable globals +api_endpoint, port, portal_name, bot_token, password = None, None, None, None, None +discord_client = None +setup_done = False + +def setup(): + # Load dotenv file if possible. + if len(sys.argv) > 1: + env_path = Path(sys.argv[1]) + load_dotenv(dotenv_path=env_path, override=True) + + global bot_token + bot_token = os.environ["DISCORD_BOT_TOKEN"] + + global portal_name + portal_name = os.getenv("PORTAL_NAME") + + # Get a port or use default + global port + port = os.getenv("API_PORT") + if not port: + port = "9980" + + global api_endpoint + api_endpoint = "http://localhost:{}".format(port) + + siad.initialize() + + global setup_done + setup_done = True + + return bot_token + +# send_msg sends the msg to the specified discord channel. If force_notify is set to true it adds "@here". +async def send_msg(client, msg, force_notify=False): + await client.wait_until_ready() + + guild = client.guilds[0] + channels = guild.channels + + chan = None + for c in channels: + if c.name == channel_name: + chan = c + + if chan is None: + print("Can't find channel {}".format(channel_name)) + + # Add the portal name. + msg = "`{}`: {}".format(portal_name, msg) + + if force_notify: + msg = "@here: \n{}".format(msg) + await chan.send(msg) + + +#siad class provides wrappers for the necessary siad commands. +class siad: + # initializes values for using the API (password and + # user-agent) so that all calls to urllib.request.urlopen have these set. + @staticmethod + def initialize(): + # Setup a handler with the API password + username = "" + password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() + password_mgr.add_password(None, api_endpoint, username, siad.get_password()) + handler = urllib.request.HTTPBasicAuthHandler(password_mgr) + + # Setup an opener with the correct user agent + opener = urllib.request.build_opener(handler) + opener.addheaders = [('User-agent', 'Sia-Agent')] + + # Install the opener. + # Now all calls to urllib.request.urlopen use our opener. + urllib.request.install_opener(opener) + + @staticmethod + def get_password(): + # Get a port or use default + password = os.getenv("SIA_API_PASSWORD") + if not password: + home = os.getenv("HOME") + password_file = open(home+"/.sia/apipassword") + password = password_file.readlines()[0].strip() + return password + + # load_json reads the http response and decodes the JSON value + @staticmethod + def load_json(resp): + return json.loads(resp.decode("utf-8")) + + + @staticmethod + def get_wallet(): + if not setup_done: setup() + + resp = urllib.request.urlopen(api_endpoint + "/wallet").read() + return siad.load_json(resp) + + + @staticmethod + def get_renter(): + if not setup_done: setup() + + resp = urllib.request.urlopen(api_endpoint + "/renter").read() + return siad.load_json(resp) + + + @staticmethod + def get_renter_contracts(): + if not setup_done: setup() + + resp = urllib.request.urlopen(api_endpoint + "/renter/contracts").read() + return siad.load_json(resp) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index c36b0312..513500b5 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -1,118 +1,25 @@ #!/usr/bin/env python3 -from urllib.request import urlopen, Request -from dotenv import load_dotenv -from pathlib import Path +import discord +from bot_utils import setup, send_msg, siad, sc_precision -import urllib, json, os, traceback, discord, sys - - -# sc_precision is the number of hastings per siacoin -sc_precision = 10 ** 24 - -# Environment variable globals -api_endpoint, port, portal_name, bot_token, password = None, None, None, None, None - -# Load dotenv file if possible. -if len(sys.argv) > 1: - env_path = Path(sys.argv[1]) - load_dotenv(dotenv_path=env_path, override=True) - -bot_token = os.environ["DISCORD_BOT_TOKEN"] -portal_name = os.getenv("PORTAL_NAME") - -# Get a port or use default -port = os.getenv("API_PORT") -if not port: - port = "9980" - -api_endpoint = "http://localhost:{}".format(port) - - -# Discord bot initialization +bot_token = setup() client = discord.Client() -channel_name = "skynet-portal-health-check" @client.event async def on_ready(): await run_checks() await client.close() -# send_msg sends the msg to the specified discord channel. If force_notify is set to true it adds "@here". -async def send_msg(msg, force_notify=False): - await client.wait_until_ready() - guild = client.guilds[0] - channels = guild.channels +async def run_checks(): + print("Running Skynet portal health checks") + try: + await check_health() - chan = None - for c in channels: - if c.name == channel_name: - chan = c - - if chan is None: - print("Can't find channel {}".format(channel_name)) - - # Add the portal name. - msg = "`{}`: {}".format(portal_name, msg) - - if force_notify: - msg = "@here: \n{}".format(msg) - await chan.send(msg) - -#siac class provides wrappers for the necessary siac commands. -class siac: - # initializes values for using the API (password and - # user-agent) so that all calls to urllib.request.urlopen have these set. - @staticmethod - def initialize(): - # Setup a handler with the API password - username = "" - password_mgr = urllib.request.HTTPPasswordMgrWithDefaultRealm() - password_mgr.add_password(None, api_endpoint, username, siac.get_password()) - handler = urllib.request.HTTPBasicAuthHandler(password_mgr) - - # Setup an opener with the correct user agent - opener = urllib.request.build_opener(handler) - opener.addheaders = [('User-agent', 'Sia-Agent')] - - # Install the opener. - # Now all calls to urllib.request.urlopen use our opener. - urllib.request.install_opener(opener) - - @staticmethod - def get_password(): - # Get a port or use default - password = os.getenv("SIA_API_PASSWORD") - if not password: - home = os.getenv("HOME") - password_file = open(home+"/.sia/apipassword") - password = password_file.readlines()[0].strip() - - return password - - # load_json reads the http response and decodes the JSON value - @staticmethod - def load_json(resp): - return json.loads(resp.decode("utf-8")) - - - @staticmethod - def get_wallet(): - resp = urllib.request.urlopen(api_endpoint + "/wallet").read() - return siac.load_json(resp) - - - @staticmethod - def get_renter(): - resp = urllib.request.urlopen(api_endpoint + "/renter").read() - return siac.load_json(resp) - - - @staticmethod - def get_renter_contracts(): - resp = urllib.request.urlopen(api_endpoint + "/renter/contracts").read() - return siac.load_json(resp) + except: # catch all exceptions + trace = traceback.format_exc() + await send_msg(client, "```\n{}\n```".format(trace), force_notify=True) # check_health checks that the wallet is unlocked, that it has at least 1 @@ -120,11 +27,11 @@ class siac: # all checks pass it sends a informational message. async def check_health(): print("\nChecking health...") - wallet_get = siac.get_wallet() - renter_get = siac.get_renter() + wallet_get = siad.get_wallet() + renter_get = siad.get_renter() if not wallet_get['unlocked']: - await send_msg("Wallet locked", force_notify=True) + await send_msg(client, "Wallet locked", force_notify=True) return confirmed_coins = int(wallet_get['confirmedsiacoinbalance']) @@ -138,31 +45,21 @@ async def check_health(): allocated_funds = int(renter_get['financialmetrics']['totalallocated']) unallocated_funds = allowance_funds - allocated_funds + + balance_msg = "Balance: `{} SC` Allowance Funds: `{} SC`".format(round(balance/sc_precision), round(allowance_funds/sc_precision)) + alloc_msg = "Unallocated: `{} SC`\nAllocated: `{} SC`".format(round(unallocated_funds/sc_precision), round(allocated_funds/sc_precision)) + # Send an alert if there is less than 1 allowance worth of money left. if balance < allowance_funds: - await send_msg("Wallet balance running low. Balance: `{} SC` Allowance Funds: `{} SC`".format(round(balance/sc_precision), round(allowance_funds/sc_precision)), force_notify=True) + await send_msg(client, "Wallet balance running low. \n{}`".format(balance_msg), force_notify=True) return # Alert devs when 1/2 the allowance is gone if allocated_funds >= unallocated_funds: - await send_msg("Allowance half spent: \nUnallocated: `{} SC`\nAllocated: `{} SC`".format(round(unallocated_funds/sc_precision), round(allocated_funds/sc_precision)), force_notify=True) + await send_msg(client, "Allowance half spent: \n{}".format(alloc_msg), force_notify=True) return # Send an informational heartbeat if all checks passed. - pretty_renter_get = json.dumps(siac.get_renter(), indent=4) - await send_msg("Health checks passed:\n\nWallet Balance: `{} SC`\n\n Renter Info:\n```\n{}\n```".format(round(balance/sc_precision), pretty_renter_get)) - - -async def run_checks(): - # Initialize the siac API helper. - siac.initialize() - - print("Running Skynet portal health checks") - try: - await check_health() - - except: # catch all exceptions - trace = traceback.format_exc() - await send_msg("```\n{}\n```".format(trace), force_notify=True) + await send_msg(client, "Health checks passed:\n{} \n{}".format(balance_msg, alloc_msg)) client.run(bot_token) From 6d8fe9ca02cff39c9679303fb9eee20a3fad3f26 Mon Sep 17 00:00:00 2001 From: Marcin Jachymiak Date: Mon, 9 Mar 2020 14:54:41 -0400 Subject: [PATCH 2/5] Add log checker --- setup-scripts/bot_utils.py | 4 +- setup-scripts/health-checker.py | 7 +++- setup-scripts/log-checker.py | 70 +++++++++++++++++++++++++++++++++ 3 files changed, 78 insertions(+), 3 deletions(-) create mode 100755 setup-scripts/log-checker.py diff --git a/setup-scripts/bot_utils.py b/setup-scripts/bot_utils.py index 4e174097..91d20589 100644 --- a/setup-scripts/bot_utils.py +++ b/setup-scripts/bot_utils.py @@ -45,7 +45,7 @@ def setup(): return bot_token # send_msg sends the msg to the specified discord channel. If force_notify is set to true it adds "@here". -async def send_msg(client, msg, force_notify=False): +async def send_msg(client, msg, force_notify=False, file=None): await client.wait_until_ready() guild = client.guilds[0] @@ -64,7 +64,7 @@ async def send_msg(client, msg, force_notify=False): if force_notify: msg = "@here: \n{}".format(msg) - await chan.send(msg) + await chan.send(msg, file=file) #siad class provides wrappers for the necessary siad commands. diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 513500b5..a9d80042 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -1,6 +1,11 @@ #!/usr/bin/env python3 -import discord +""" +health-checker runs simple health checks on a portal node using the siad API and +dispatches messages to a Discord channel. +""" + +import discord, traceback from bot_utils import setup, send_msg, siad, sc_precision bot_token = setup() diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py new file mode 100755 index 00000000..c15f527a --- /dev/null +++ b/setup-scripts/log-checker.py @@ -0,0 +1,70 @@ +#!/usr/bin/env python3 + +import discord, sys, traceback, io +from bot_utils import setup, send_msg, sc_precision + +from datetime import datetime, timedelta +from subprocess import Popen, PIPE + +""" +log-checker checks journal logs for siad. + +Arguments: + 1. path to a .env file (default is none so env variables can already be + preset) + +""" + +DEFAULT_CHECK_INTERVAL = timedelta(hours=1) + +bot_token = setup() +client = discord.Client() + +@client.event +async def on_ready(): + await run_checks() + await client.close() + + +async def run_checks(): + print("Running Skynet portal log checks") + try: + await check_journal() + + except: # catch all exceptions + trace = traceback.format_exc() + await send_msg(client, "```\n{}\n```".format(trace), force_notify=False) + + +# check_journal checks the journal +async def check_journal(): + print("\nChecking journal...") + + now = datetime.now() + time = now - DEFAULT_CHECK_INTERVAL + time_string = "{}-{}-{} {}:{}:{}".format(time.year, time.month, time.day, time.hour, time.minute, time.second) + + # Get the systemd service name as an argument, or use "siad" as default. + service_name = "siad" + if len(sys.argv) > 2: + service_name = sys.argv[2] + + # Open the journal. + proc = Popen(["journalctl", "--user-unit", service_name, "--since", time_string], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True) + std_out, std_err = proc.communicate() + + if len(std_err) > 0: + await send_msg(client, "Error reading journalctl output: {}".format(std_err)) + return + + # If there are any critical errors. upload the whole log file. + if "Critical" in std_out: + upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(service_name, time.year, time.month, time.day, time.hour, time.minute, time.second) + await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name)) + return + + # No critical errors, return a heartbeat type message. + await send_msg(client, "No critical warnings in log (size of log portion checked: {})".format(len(std_out))) + + +client.run(bot_token) From 285d915602dcfc666dffb017820e63808d176466 Mon Sep 17 00:00:00 2001 From: Marcin Jachymiak Date: Mon, 9 Mar 2020 15:59:21 -0400 Subject: [PATCH 3/5] Rename health-checker to funds-checker --- setup-scripts/{health-checker.py => funds-checker.py} | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) rename setup-scripts/{health-checker.py => funds-checker.py} (98%) diff --git a/setup-scripts/health-checker.py b/setup-scripts/funds-checker.py similarity index 98% rename from setup-scripts/health-checker.py rename to setup-scripts/funds-checker.py index a9d80042..d0750a8c 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/funds-checker.py @@ -31,7 +31,7 @@ async def run_checks(): # allowance worth of money left, and if more than hald the allowance is spent. If # all checks pass it sends a informational message. async def check_health(): - print("\nChecking health...") + print("\nChecking wallet/funds health...") wallet_get = siad.get_wallet() renter_get = siad.get_renter() From 1349440251b94084e02ee876f659f3294401b814 Mon Sep 17 00:00:00 2001 From: Marcin Jachymiak Date: Mon, 9 Mar 2020 16:01:41 -0400 Subject: [PATCH 4/5] Update README and health check setup-scripts --- setup-scripts/README.md | 17 +++++++++++++++++ setup-scripts/setup-health-check-scripts.sh | 10 ++++++++-- 2 files changed, 25 insertions(+), 2 deletions(-) diff --git a/setup-scripts/README.md b/setup-scripts/README.md index f507cc34..c43f8dc0 100644 --- a/setup-scripts/README.md +++ b/setup-scripts/README.md @@ -115,3 +115,20 @@ You can check that with `node -v` and `yarn -v` commands respectively. - run `yarn build` to build the client package Client package will be outputted to `/public` and nginx configuration will pick it up automatically. + +## Health Check Scripts. + +There are 2 optional health check scripts that can be setup using +`setup-health-check-scripts.sh`. That command will install the necesary Python +dependencies and setup 2 cronjobs for each script: one for a downloading `siad` +and for an uploading `siad` service. + +To use the scripts you must setup a Discord bot and provide a bot token. The bot +scripts take in 1 or more arguments, the first always being the path to an +`.env` file. + +`funds-checker` checks that the wallet balance and allowance settings are +sufficient for portal usage. + +`log-checker` checks if there are any critical warnings in the journal for the +running services. diff --git a/setup-scripts/setup-health-check-scripts.sh b/setup-scripts/setup-health-check-scripts.sh index aae7063b..f347b16e 100755 --- a/setup-scripts/setup-health-check-scripts.sh +++ b/setup-scripts/setup-health-check-scripts.sh @@ -7,8 +7,14 @@ sudo apt-get -y install python3-pip pip3 install discord.py pip3 install python-dotenv -downloadCheck="0 0,8,16 * * * ~/skynet-webportal/setup-scripts/health-checker.py ~/.sia/sia.env" -uploadCheck="0 0,8,16 * * * ~/skynet-webportal/setup-scripts/health-checker.py ~/.sia/sia-upload.env" +downloadCheck="0 0,8,16 * * * ~/skynet-webportal/setup-scripts/funds-checker.py ~/.sia/sia.env" +uploadCheck="0 0,8,16 * * * ~/skynet-webportal/setup-scripts/funds-checker.py ~/.sia/sia-upload.env" + +logCheck1="0 * * * * ~/skynet-webportal/setup-scripts/log-checker.py ~/.sia/sia.env" +logCheck2="0 * * * * ~/skynet-webportal/setup-scripts/log-checker.py ~/.sia/sia-upload.env siad-upload" (crontab -u user -l; echo "$downloadCheck" ) | crontab -u user - (crontab -u user -l; echo "$uploadCheck" ) | crontab -u user - + +(crontab -u user -l; echo "$logCheck1" ) | crontab -u user - +(crontab -u user -l; echo "$logCheck2" ) | crontab -u user - From bc3e72726eb74203e9e97d5cdfc7051196870784 Mon Sep 17 00:00:00 2001 From: Marcin Jachymiak Date: Mon, 9 Mar 2020 16:06:15 -0400 Subject: [PATCH 5/5] Set force_notify in log check error cases --- setup-scripts/log-checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index c15f527a..15ee1bed 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -54,13 +54,13 @@ async def check_journal(): std_out, std_err = proc.communicate() if len(std_err) > 0: - await send_msg(client, "Error reading journalctl output: {}".format(std_err)) + await send_msg(client, "Error reading journalctl output: {}".format(std_err), force_notify=True) return # If there are any critical errors. upload the whole log file. if "Critical" in std_out: upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(service_name, time.year, time.month, time.day, time.hour, time.minute, time.second) - await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name)) + await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True) return # No critical errors, return a heartbeat type message.