From 00fff39bf21502bf9e5ae1000adc8a6298614eec Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Tue, 18 Aug 2020 17:27:01 +0300 Subject: [PATCH 1/9] Add a new log checked for docker. --- setup-scripts/log-checker.py | 60 +++++++++++++++++---- setup-scripts/setup-health-check-scripts.sh | 2 +- 2 files changed, 51 insertions(+), 11 deletions(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index e8d9b5e3..ff8cf03a 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -1,26 +1,25 @@ #!/usr/bin/env python3 -import discord, sys, traceback, io +import discord, sys, traceback, io, os from bot_utils import setup, send_msg, sc_precision - from datetime import datetime, timedelta from subprocess import Popen, PIPE """ -log-checker checks journal logs for siad. +log-checker checks the docker logs for siad. Arguments: 1. path to a .env file (default is none so env variables can already be preset) - 2. systemd service name (default: "siad") + 2. docker container name name (default: "sia") - 3. number of hours to look back in log (used as --since value in journalctl - command) (default: 1 hour) + 3. number of hours to look back in log (default: 1 hour) """ -DEFAULT_CHECK_INTERVAL = timedelta(hours=1) +# The default check interval in hours. +DEFAULT_CHECK_INTERVAL = 1 bot_token = setup() client = discord.Client() @@ -34,7 +33,7 @@ async def on_ready(): async def run_checks(): print("Running Skynet portal log checks") try: - await check_journal() + await check_docker_logs() except: # catch all exceptions trace = traceback.format_exc() @@ -51,7 +50,7 @@ async def check_journal(): service_name = sys.argv[2] # Get the systemd service name as an argument, or use "siad" as default. - check_interval = DEFAULT_CHECK_INTERVAL + check_interval = timedelta(hours=DEFAULT_CHECK_INTERVAL) if len(sys.argv) > 3: check_interval = timedelta(hours=int(sys.argv[3])) @@ -73,10 +72,51 @@ async def check_journal(): await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True) return - # No critical errors, return a heartbeat type messagej + # No critical errors, return a heartbeat type message pretty_before = time.strftime("%I:%M%p") pretty_now = now.strftime("%I:%M%p") await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now)) +# check_docker_logs checks the docker logs by filtering on the docker image name +async def check_docker_logs(): + print("\nChecking docker logs...") + + # Get the container name as an argument or use "sia" as default. + container_name = "sia" + if len(sys.argv) > 2: + container_name = sys.argv[2] + + # Get the container id for siad. + stream = os.popen('docker ps -q --filter name=^{}$'.format(container_name)) + image_id = stream.read().strip() + + # Get the number of hours to look back in the logs or use 1 as default. + check_hours = DEFAULT_CHECK_INTERVAL + if len(sys.argv) > 3: + check_hours = int(sys.argv[3]) + + now = datetime.now() + time = now - timedelta(hours=check_hours) + time_string = "{}h".format(check_hours) + + # Read the logs. + proc = Popen(["docker", "logs", "--since", time_string, image_id], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True) + std_out, std_err = proc.communicate() + + if len(std_err) > 0: + await send_msg(client, "Error reading docker logs output: {}".format(std_err), force_notify=True) + return + + # If there are any critical errors. upload the whole log file. + if "Critical" in std_out or "panic" in std_out: + upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(container_name, time.year, time.month, time.day, time.hour, time.minute, time.second) + await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True) + return + + # No critical errors, return a heartbeat type message + pretty_before = time.strftime("%I:%M%p") + pretty_now = now.strftime("%I:%M%p") + await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now)) + client.run(bot_token) diff --git a/setup-scripts/setup-health-check-scripts.sh b/setup-scripts/setup-health-check-scripts.sh index 81ec5376..6f56e8b7 100755 --- a/setup-scripts/setup-health-check-scripts.sh +++ b/setup-scripts/setup-health-check-scripts.sh @@ -9,7 +9,7 @@ pip3 install discord.py pip3 install python-dotenv fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/.sia/sia.env" -logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/.sia/sia.env siad 8" +logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/.sia/sia.env sia 8" (crontab -u user -l; echo "$fundsCheck" ) | crontab -u user - (crontab -u user -l; echo "$logsCheck" ) | crontab -u user - From 4b7f80c3b4370cc67e7ce37bdbb1c29c406bc839 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Tue, 18 Aug 2020 17:38:07 +0300 Subject: [PATCH 2/9] Add a comment informing editors that changing the name of the `siad` container needs to be reflecred in log-checker.py. --- docker-compose.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/docker-compose.yml b/docker-compose.yml index aad06d76..81cf2b65 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,6 +14,7 @@ services: dockerfile: Dockerfile args: branch: v1.5.0 + # Changing the container name will break the log-checker.py script, so please update that accordingly. container_name: sia restart: unless-stopped environment: From 221902508f3e0ecd43eded77413b683e3818a254 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Tue, 18 Aug 2020 18:25:54 +0300 Subject: [PATCH 3/9] Give the client 10 seconds to properly exits and if it fails to do so `exit` the hard way. --- setup-scripts/funds-checker.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/setup-scripts/funds-checker.py b/setup-scripts/funds-checker.py index 4608352d..2f840538 100755 --- a/setup-scripts/funds-checker.py +++ b/setup-scripts/funds-checker.py @@ -5,15 +5,21 @@ health-checker runs simple health checks on a portal node using the siad API and dispatches messages to a Discord channel. """ -import discord, traceback +import discord, traceback, asyncio from bot_utils import setup, send_msg, siad, sc_precision bot_token = setup() client = discord.Client() +async def exit_after(delay): + await asyncio.sleep(delay) + exit(0) + + @client.event async def on_ready(): await run_checks() + asyncio.create_task(exit_after(10)) await client.close() From 2a1c06116bed854482dbb73b19fa793e0dcaa8aa Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Tue, 18 Aug 2020 18:36:00 +0300 Subject: [PATCH 4/9] Add a handy command to the readme that checks the logs in a dockerized environment. --- setup-scripts/README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup-scripts/README.md b/setup-scripts/README.md index 6738dec3..bb62bdc5 100644 --- a/setup-scripts/README.md +++ b/setup-scripts/README.md @@ -104,8 +104,10 @@ At this point we have almost everything running, we just need to set up your wal > `docker exec caddy caddy reload --config /etc/caddy/Caddyfile` - Restarting nginx gracefully after making changes to nginx configs > `docker exec nginx openresty -s reload` -- Checking siad service logs (follow last 50 lines) +- Checking siad service logs (follow last 50 lines) in non-dockerized environments > `journalctl -f -n 50 --user-unit siad` +- Checking siad service logs (last hour) in dockerized environments + > `docker logs --since 1h $(docker ps -q --filter "name=^sia$")` - Checking caddy logs (for example in case ssl certificate fails) > `docker logs caddy -f` - Checking nginx logs (nginx handles all communication to siad instances) From b05af57b541dfa0d80ec39e3e90a9a6635071e10 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Tue, 18 Aug 2020 18:37:49 +0300 Subject: [PATCH 5/9] Cleanup the old function. --- setup-scripts/log-checker.py | 38 ------------------------------------ 1 file changed, 38 deletions(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index ff8cf03a..fdbcd250 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -40,44 +40,6 @@ async def run_checks(): await send_msg(client, "```\n{}\n```".format(trace), force_notify=False) -# check_journal checks the journal -async def check_journal(): - print("\nChecking journal...") - - # Get the systemd service name as an argument, or use "siad" as default. - service_name = "siad" - if len(sys.argv) > 2: - service_name = sys.argv[2] - - # Get the systemd service name as an argument, or use "siad" as default. - check_interval = timedelta(hours=DEFAULT_CHECK_INTERVAL) - if len(sys.argv) > 3: - check_interval = timedelta(hours=int(sys.argv[3])) - - now = datetime.now() - time = now - check_interval - time_string = "{}-{}-{} {}:{}:{}".format(time.year, time.month, time.day, time.hour, time.minute, time.second) - - # Open the journal. - proc = Popen(["journalctl", "--user-unit", service_name, "--since", time_string], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True) - std_out, std_err = proc.communicate() - - if len(std_err) > 0: - await send_msg(client, "Error reading journalctl output: {}".format(std_err), force_notify=True) - return - - # If there are any critical errors. upload the whole log file. - if "Critical" in std_out or "panic" in std_out: - upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(service_name, time.year, time.month, time.day, time.hour, time.minute, time.second) - await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True) - return - - # No critical errors, return a heartbeat type message - pretty_before = time.strftime("%I:%M%p") - pretty_now = now.strftime("%I:%M%p") - await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now)) - - # check_docker_logs checks the docker logs by filtering on the docker image name async def check_docker_logs(): print("\nChecking docker logs...") From 19fff428cd9e2d0df7fffe67e94b82b4a7f3d6f0 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Wed, 19 Aug 2020 10:17:26 +0300 Subject: [PATCH 6/9] Move code to the right place. --- setup-scripts/funds-checker.py | 6 +----- setup-scripts/log-checker.py | 9 ++++++++- 2 files changed, 9 insertions(+), 6 deletions(-) diff --git a/setup-scripts/funds-checker.py b/setup-scripts/funds-checker.py index 2f840538..767d885b 100755 --- a/setup-scripts/funds-checker.py +++ b/setup-scripts/funds-checker.py @@ -5,16 +5,12 @@ health-checker runs simple health checks on a portal node using the siad API and dispatches messages to a Discord channel. """ -import discord, traceback, asyncio +import discord, traceback from bot_utils import setup, send_msg, siad, sc_precision bot_token = setup() client = discord.Client() -async def exit_after(delay): - await asyncio.sleep(delay) - exit(0) - @client.event async def on_ready(): diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index fdbcd250..cce9a4ad 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import discord, sys, traceback, io, os +import discord, sys, traceback, io, os, asyncio from bot_utils import setup, send_msg, sc_precision from datetime import datetime, timedelta from subprocess import Popen, PIPE @@ -24,9 +24,16 @@ DEFAULT_CHECK_INTERVAL = 1 bot_token = setup() client = discord.Client() + +async def exit_after(delay): + await asyncio.sleep(delay) + exit(0) + + @client.event async def on_ready(): await run_checks() + asyncio.create_task(exit_after(10)) await client.close() From 81acebad5cb466eda9eebca1a747ee836c5c4286 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Wed, 19 Aug 2020 10:25:23 +0300 Subject: [PATCH 7/9] Add the forced exit to funds-checker.py as well. --- setup-scripts/funds-checker.py | 9 +++++++-- setup-scripts/log-checker.py | 4 ++-- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/setup-scripts/funds-checker.py b/setup-scripts/funds-checker.py index 767d885b..39c31dbb 100755 --- a/setup-scripts/funds-checker.py +++ b/setup-scripts/funds-checker.py @@ -5,17 +5,22 @@ health-checker runs simple health checks on a portal node using the siad API and dispatches messages to a Discord channel. """ -import discord, traceback +import discord, traceback, asyncio from bot_utils import setup, send_msg, siad, sc_precision bot_token = setup() client = discord.Client() +async def exit_after(delay): + await asyncio.sleep(delay) + exit(0) + + @client.event async def on_ready(): await run_checks() - asyncio.create_task(exit_after(10)) + asyncio.create_task(exit_after(30)) await client.close() diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index cce9a4ad..d4974048 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 import discord, sys, traceback, io, os, asyncio -from bot_utils import setup, send_msg, sc_precision +from bot_utils import setup, send_msg from datetime import datetime, timedelta from subprocess import Popen, PIPE @@ -33,7 +33,7 @@ async def exit_after(delay): @client.event async def on_ready(): await run_checks() - asyncio.create_task(exit_after(10)) + asyncio.create_task(exit_after(30)) await client.close() From 341c65eba83c831eb5b97760a3c09157db9245d6 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Wed, 19 Aug 2020 16:13:56 +0300 Subject: [PATCH 8/9] Remove unneeded comment and example commands. --- docker-compose.yml | 1 - setup-scripts/README.md | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/docker-compose.yml b/docker-compose.yml index 81cf2b65..aad06d76 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -14,7 +14,6 @@ services: dockerfile: Dockerfile args: branch: v1.5.0 - # Changing the container name will break the log-checker.py script, so please update that accordingly. container_name: sia restart: unless-stopped environment: diff --git a/setup-scripts/README.md b/setup-scripts/README.md index bb62bdc5..9083dae7 100644 --- a/setup-scripts/README.md +++ b/setup-scripts/README.md @@ -104,9 +104,7 @@ At this point we have almost everything running, we just need to set up your wal > `docker exec caddy caddy reload --config /etc/caddy/Caddyfile` - Restarting nginx gracefully after making changes to nginx configs > `docker exec nginx openresty -s reload` -- Checking siad service logs (follow last 50 lines) in non-dockerized environments - > `journalctl -f -n 50 --user-unit siad` -- Checking siad service logs (last hour) in dockerized environments +- Checking siad service logs (last hour) > `docker logs --since 1h $(docker ps -q --filter "name=^sia$")` - Checking caddy logs (for example in case ssl certificate fails) > `docker logs caddy -f` From af49e65b040b8470f1471b75e3d5fa4ea0355c12 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Wed, 19 Aug 2020 19:14:55 +0300 Subject: [PATCH 9/9] Switch from `/home/user/.sia/sia.env` to `/home/user/skynet-webportal/.env`. --- setup-scripts/setup-docker-services.sh | 9 ++++++--- setup-scripts/setup-health-check-scripts.sh | 4 ++-- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/setup-scripts/setup-docker-services.sh b/setup-scripts/setup-docker-services.sh index 57d1cf0c..a7903c80 100755 --- a/setup-scripts/setup-docker-services.sh +++ b/setup-scripts/setup-docker-services.sh @@ -17,17 +17,20 @@ sudo curl -L "https://github.com/docker/compose/releases/download/1.25.5/docker- sudo chmod +x /usr/local/bin/docker-compose docker-compose --version # sanity check -# Create dummy .env file for docker-compose usage with veriables +# Create dummy .env file for docker-compose usage with variables # * DOMAIN_NAME - the domain name your server is using ie. example.com # * EMAIL_ADDRESS - this is the administrator contact email you need to supply for communication regarding SSL certification # * HSD_API_KEY - this is auto generated secure key for your handshake service integration # * CLOUDFLARE_AUTH_TOKEN` - (optional) if using cloudflare as dns loadbalancer (need to change it in Caddyfile too) # * AWS_ACCESS_KEY_ID - (optional) if using route53 as a dns loadbalancer # * AWS_SECRET_ACCESS_KEY - (optional) if using route53 as a dns loadbalancer +# * API_PORT - (optional) the port on which siad is listening, defaults to 9980 +# * PORTAL_NAME - the name of the portal, required by the discord bot +# * DISCORD_BOT_TOKEN - required by the discord bot if ! [ -f /home/user/skynet-webportal/.env ]; then HSD_API_KEY=$(openssl rand -base64 32) # generate safe random key for handshake - printf "DOMAIN_NAME=example.com\nEMAIL_ADDRESS=email@example.com\nSIA_WALLET_PASSWORD=\nHSD_API_KEY=${HSD_API_KEY}\nCLOUDFLARE_AUTH_TOKEN=\nAWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\n" > /home/user/skynet-webportal/.env + printf "DOMAIN_NAME=example.com\nEMAIL_ADDRESS=email@example.com\nSIA_WALLET_PASSWORD=\nHSD_API_KEY=${HSD_API_KEY}\nCLOUDFLARE_AUTH_TOKEN=\nAWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\nPORTAL_NAME=\nDISCORD_BOT_TOKEN=\n" > /home/user/skynet-webportal/.env fi # Start docker container with nginx and client -docker-compose -f docker-compose.yml up --build -d \ No newline at end of file +docker-compose -f docker-compose.yml up --build -d diff --git a/setup-scripts/setup-health-check-scripts.sh b/setup-scripts/setup-health-check-scripts.sh index 6f56e8b7..314a2170 100755 --- a/setup-scripts/setup-health-check-scripts.sh +++ b/setup-scripts/setup-health-check-scripts.sh @@ -8,8 +8,8 @@ sudo apt-get -y install python3-pip pip3 install discord.py pip3 install python-dotenv -fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/.sia/sia.env" -logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/.sia/sia.env sia 8" +fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/skynet-webportal/.env" +logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/skynet-webportal/.env sia 8" (crontab -u user -l; echo "$fundsCheck" ) | crontab -u user - (crontab -u user -l; echo "$logsCheck" ) | crontab -u user -