From efb35054f7b824b604d78daf4bca1a0bb9ac7dcb Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Fri, 28 Aug 2020 11:54:47 +0200 Subject: [PATCH 1/3] When chekcing logs, check also the system load and report it if it's above 10. --- setup-scripts/log-checker.py | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index bfb53895..d30e7be4 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import discord, sys, traceback, io, os, asyncio +import discord, sys, traceback, io, os, asyncio, re from bot_utils import setup, send_msg from datetime import datetime, timedelta from subprocess import Popen, PIPE @@ -25,6 +25,7 @@ bot_token = setup() client = discord.Client() +# exit_after kills the script if it hasn't exited on its own after `delay` seconds async def exit_after(delay): await asyncio.sleep(delay) exit(0) @@ -40,6 +41,7 @@ async def on_ready(): async def run_checks(): print("Running Skynet portal log checks") try: + await check_load_average() await check_docker_logs() except: # catch all exceptions @@ -47,6 +49,19 @@ async def run_checks(): await send_msg(client, "```\n{}\n```".format(trace), force_notify=False) +# check_load_average monitors the system's load average value and issues a +# warning message if it exceeds 10. +async def check_load_average(): + uptime_string = os.popen("uptime").read().strip() + # pattern = "" + if sys.platform == "Darwin": + pattern = "^.*load averages: (\d*\.\d*) \d*\.\d* \d*\.\d*$" + else: + pattern = "^.*load average: (\d*\.\d*), \d*\.\d*, \d*\.\d*$" + load_av = re.match(pattern, uptime_string).group(1) + if float(load_av) > 10: + await send_msg(client, "High system load detected: `{}`".format(uptime_string), force_notify=True) + # check_docker_logs checks the docker logs by filtering on the docker image name async def check_docker_logs(): print("\nChecking docker logs...") From 58409b8ceee480f7a0d6624f890baa1d56965f49 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Fri, 28 Aug 2020 11:59:27 +0200 Subject: [PATCH 2/3] Switch from the "1 minute" to "15 minutes" average. --- setup-scripts/log-checker.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index d30e7be4..a020c3d6 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -55,9 +55,9 @@ async def check_load_average(): uptime_string = os.popen("uptime").read().strip() # pattern = "" if sys.platform == "Darwin": - pattern = "^.*load averages: (\d*\.\d*) \d*\.\d* \d*\.\d*$" + pattern = "^.*load averages: \d*\.\d* \d*\.\d* (\d*\.\d*)$" else: - pattern = "^.*load average: (\d*\.\d*), \d*\.\d*, \d*\.\d*$" + pattern = "^.*load average: \d*\.\d*, \d*\.\d*, (\d*\.\d*)$" load_av = re.match(pattern, uptime_string).group(1) if float(load_av) > 10: await send_msg(client, "High system load detected: `{}`".format(uptime_string), force_notify=True) From 59bad59cb7cc2d64eb59f659a02813a80b961cf0 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Fri, 28 Aug 2020 16:05:24 +0200 Subject: [PATCH 3/3] Make it clear that the uptime data comes from the `uptime` command and not something we're putting together ourselves. --- setup-scripts/log-checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index a020c3d6..2cd2c27c 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -60,7 +60,7 @@ async def check_load_average(): pattern = "^.*load average: \d*\.\d*, \d*\.\d*, (\d*\.\d*)$" load_av = re.match(pattern, uptime_string).group(1) if float(load_av) > 10: - await send_msg(client, "High system load detected: `{}`".format(uptime_string), force_notify=True) + await send_msg(client, "High system load detected: `uptime: {}`".format(uptime_string), force_notify=True) # check_docker_logs checks the docker logs by filtering on the docker image name async def check_docker_logs():