Merge pull request #385 from NebulousLabs/ivo/gollum_time_zones
Make the time comparisons in the health checker timezone-aware.
This commit is contained in:
commit
a4dd527a30
|
@ -10,8 +10,10 @@ import traceback
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
|
|
||||||
import discord
|
import discord
|
||||||
|
import pytz.reference
|
||||||
import requests
|
import requests
|
||||||
from bot_utils import setup, send_msg
|
from bot_utils import setup, send_msg
|
||||||
|
from tzlocal import get_localzone
|
||||||
|
|
||||||
"""
|
"""
|
||||||
health-checker reads the /health-check endpoint of the portal and dispatches
|
health-checker reads the /health-check endpoint of the portal and dispatches
|
||||||
|
@ -27,11 +29,10 @@ if len(sys.argv) > 3:
|
||||||
# a lower limit in order to leave some space for additional message text.
|
# a lower limit in order to leave some space for additional message text.
|
||||||
DISCORD_MAX_MESSAGE_LENGTH = 1900
|
DISCORD_MAX_MESSAGE_LENGTH = 1900
|
||||||
|
|
||||||
GB = 1 << 20 # converts from KiB to GiB
|
GB = 1 << 30 # 1 GiB in bytes
|
||||||
# We are going to issue Discord warnings if the free space on a server falls
|
# We are going to issue Discord warnings if the free space on a server falls
|
||||||
# under this threshold.
|
# under this threshold.
|
||||||
FREE_DISK_SPACE_THRESHOLD = 50 * GB # 50 GiB
|
FREE_DISK_SPACE_THRESHOLD = 50 * GB
|
||||||
|
|
||||||
|
|
||||||
bot_token = setup()
|
bot_token = setup()
|
||||||
client = discord.Client()
|
client = discord.Client()
|
||||||
|
@ -87,8 +88,8 @@ async def check_disk():
|
||||||
volumes = {}
|
volumes = {}
|
||||||
for line in df.split("\n")[1:]:
|
for line in df.split("\n")[1:]:
|
||||||
fields = list(filter(None, line.split(" ")))
|
fields = list(filter(None, line.split(" ")))
|
||||||
# -1 is "mounted on", 3 is "available space"
|
# -1 is "mounted on", 3 is "available space" in KiB which we want in bytes
|
||||||
volumes[fields[-1]] = fields[3]
|
volumes[fields[-1]] = fields[3] * 1024
|
||||||
# List of mount point, longest to shortest. We'll use that to find the best
|
# List of mount point, longest to shortest. We'll use that to find the best
|
||||||
# fit for the volume we want to check.
|
# fit for the volume we want to check.
|
||||||
mount_points = sorted(volumes.keys(), key=len, reverse=True)
|
mount_points = sorted(volumes.keys(), key=len, reverse=True)
|
||||||
|
@ -126,13 +127,15 @@ async def check_health():
|
||||||
return
|
return
|
||||||
|
|
||||||
# Check the health records.
|
# Check the health records.
|
||||||
failed_records = []
|
passed_checks = 0
|
||||||
failed_checks = 0
|
failed_checks = 0
|
||||||
failed_critical = 0
|
failed_critical = 0
|
||||||
passed_checks_counter = 0
|
failed_records = []
|
||||||
time_limit = datetime.now() - timedelta(hours=CHECK_HOURS)
|
time_limit_unaware = datetime.now() - timedelta(hours=CHECK_HOURS) # local time
|
||||||
|
time_limit = time_limit_unaware.astimezone(get_localzone()) # time with time zone
|
||||||
for rec in res.json():
|
for rec in res.json():
|
||||||
time = datetime.strptime(rec['date'], '%Y-%m-%dT%H:%M:%S.%fZ')
|
time_unaware = datetime.strptime(rec['date'], '%Y-%m-%dT%H:%M:%S.%fZ') # time in UTC
|
||||||
|
time = pytz.utc.localize(time_unaware) # time with time zone
|
||||||
if time < time_limit:
|
if time < time_limit:
|
||||||
continue
|
continue
|
||||||
bad = False
|
bad = False
|
||||||
|
@ -145,18 +148,21 @@ async def check_health():
|
||||||
if bad:
|
if bad:
|
||||||
# We append the entire record, so we can get the full context.
|
# We append the entire record, so we can get the full context.
|
||||||
failed_records.append(rec)
|
failed_records.append(rec)
|
||||||
passed_checks_counter += 1
|
passed_checks += 1
|
||||||
|
|
||||||
|
checks = passed_checks + failed_checks
|
||||||
if len(failed_records) > 0:
|
if len(failed_records) > 0:
|
||||||
message = "Found {} failed checks ({} critical) over the last {} hours!".format(failed_checks, failed_critical,
|
message = "Found {}/{} failed checks ({} critical) over the last {} hours!".format(failed_checks, checks,
|
||||||
CHECK_HOURS)
|
failed_critical, CHECK_HOURS)
|
||||||
file = discord.File(io.BytesIO(json.dumps(failed_records, indent=2).encode()), filename="failed_checks.log")
|
file = discord.File(io.BytesIO(json.dumps(failed_records, indent=2).encode()), filename="failed_checks.log")
|
||||||
notifyTeam = failed_critical > 0
|
notifyTeam = failed_critical > 0
|
||||||
await send_msg(client, message, file=file, force_notify=notifyTeam)
|
await send_msg(client, message, file=file, force_notify=notifyTeam)
|
||||||
return
|
return
|
||||||
|
|
||||||
# Send an informational heartbeat if all checks passed.
|
# Send an informational heartbeat if all checks passed but only if it's in
|
||||||
await send_msg(client, "Health checks passed: {}\n".format(passed_checks_counter))
|
# the first CHECK_HOURS hours of the day, essentially the first call.
|
||||||
|
if datetime.now().hour < CHECK_HOURS:
|
||||||
|
await send_msg(client, "Health checks passed: {}/{}\n".format(passed_checks, checks))
|
||||||
|
|
||||||
|
|
||||||
client.run(bot_token)
|
client.run(bot_token)
|
||||||
|
|
|
@ -5,7 +5,7 @@ set -e # exit on first error
|
||||||
sudo apt-get update
|
sudo apt-get update
|
||||||
sudo apt-get -y install python3-pip
|
sudo apt-get -y install python3-pip
|
||||||
|
|
||||||
pip3 install discord.py python-dotenv requests
|
pip3 install discord.py python-dotenv requests pytz tzlocal
|
||||||
|
|
||||||
fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/skynet-webportal/.env"
|
fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/skynet-webportal/.env"
|
||||||
logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/skynet-webportal/.env sia 8"
|
logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/skynet-webportal/.env sia 8"
|
||||||
|
|
Reference in New Issue