From 60f8371170cc1da9531595b9b2f0adb802159329 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Tue, 6 Oct 2020 11:24:18 +0200 Subject: [PATCH 1/3] stop sia container on critical disk space threshold --- setup-scripts/health-checker.py | 23 ++++++++++++++++++++--- 1 file changed, 20 insertions(+), 3 deletions(-) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 3c9c18fd..0ee10129 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import asyncio, json, os, re, sys, traceback, discord, requests +import asyncio, json, os, re, sys, traceback, discord, requests, time, subprocess from datetime import datetime, timedelta from bot_utils import setup, send_msg @@ -19,9 +19,10 @@ if len(sys.argv) > 3: DISCORD_MAX_MESSAGE_LENGTH = 1900 GB = 1 << 30 # 1 GiB in bytes -# We are going to issue Discord warnings if the free space on a server falls -# under this threshold. + +# Free disk space threshold used for notices and shutting down siad. FREE_DISK_SPACE_THRESHOLD = 50 * GB +FREE_DISK_SPACE_THRESHOLD_CRITICAL = 20 * GB bot_token = setup() client = discord.Client() @@ -92,6 +93,22 @@ async def check_disk(): if vol == "": message = "Failed to check free disk space! Didn't find a suitable mount point to check." return await send_msg(client, message, file=df) + + # if we've reached a critical free disk space threshold we need to send proper notice + # and shut down sia container so it doesn't get corrupted + if int(volumes[vol]) < FREE_DISK_SPACE_THRESHOLD_CRITICAL: + free_space_gb = "{:.2f}".format(int(volumes[vol]) / GB) + message = "CRITICAL! Very low disk space: {}GiB".format(free_space_gb) + inspect = os.popen("docker inspect sia").read().strip() + inspect_json = json.loads(inspect) + if inspect_json[0]["State"]["Running"] == True: + message += ", **stopping siad container**!" + subprocess.Popen('/home/user/skynet-webportal/scripts/portal-down.sh') + else: + message += ", siad container is already stopped!" + return await send_msg(client, message, force_notify=True) + + # if we're reached a free disk space threshold we need to send proper notice if int(volumes[vol]) < FREE_DISK_SPACE_THRESHOLD: free_space_gb = "{:.2f}".format(int(volumes[vol]) / GB) message = "WARNING! Low disk space: {}GiB".format(free_space_gb) From 9b6d61aa7e05004a1da9c93338aaa5ed1222a48d Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Tue, 6 Oct 2020 11:27:06 +0200 Subject: [PATCH 2/3] remove unnecessary time dependency --- setup-scripts/health-checker.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 0ee10129..726f4193 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import asyncio, json, os, re, sys, traceback, discord, requests, time, subprocess +import asyncio, json, os, re, sys, traceback, discord, requests, subprocess from datetime import datetime, timedelta from bot_utils import setup, send_msg From 1922c4cd9872a77764e7e54d3a5b90dda7af815e Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Tue, 6 Oct 2020 12:12:19 +0200 Subject: [PATCH 3/3] use os.popopen manually --- setup-scripts/health-checker.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index 726f4193..b36b45fb 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 -import asyncio, json, os, re, sys, traceback, discord, requests, subprocess +import asyncio, json, os, re, sys, traceback, discord, requests, time from datetime import datetime, timedelta from bot_utils import setup, send_msg @@ -98,14 +98,13 @@ async def check_disk(): # and shut down sia container so it doesn't get corrupted if int(volumes[vol]) < FREE_DISK_SPACE_THRESHOLD_CRITICAL: free_space_gb = "{:.2f}".format(int(volumes[vol]) / GB) - message = "CRITICAL! Very low disk space: {}GiB".format(free_space_gb) + message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format(free_space_gb) inspect = os.popen("docker inspect sia").read().strip() inspect_json = json.loads(inspect) if inspect_json[0]["State"]["Running"] == True: - message += ", **stopping siad container**!" - subprocess.Popen('/home/user/skynet-webportal/scripts/portal-down.sh') - else: - message += ", siad container is already stopped!" + os.popen("docker exec health-check cli/disable") # mark portal as unhealthy + time.sleep(300) # wait 5 minutes to propagate dns changes + os.popen("docker stop sia") # stop sia container return await send_msg(client, message, force_notify=True) # if we're reached a free disk space threshold we need to send proper notice