Merge pull request #1407 from SkynetLabs/fil/health-checker-disk-space-dump

Dump disk usage on health-checker critical disk space
This commit is contained in:
Christopher Schinnerl 2021-12-01 15:29:35 +01:00 committed by GitHub
commit 6dec2be02a
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 69 additions and 0 deletions

View File

@ -0,0 +1,2 @@
- Dump disk space usage when health-checker script disables portal due to
critical free disk space.

View File

@ -0,0 +1,59 @@
#!/bin/bash
# Dumps disk usage to stdout or to the file
#
# Parameters:
# - $1 (optional): Filename to append the output to.
#
# Usage:
# - Dump disk usage to stdout:
# ./disk-usage-dump.sh
#
# - Dump disk usage appending to th file:
# ./disk-usage-dump.sh my-log-file.log
#
# Use docker container to get root (script can be run under regular user, no
# need for sudo)
dump () {
echo
echo "### Disk usage dump at $(date) ###"
# Free disk space
echo
df -h /home/user
# Home dirs
echo
echo "Home dirs:"
docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/*
# Docker data dirs
echo
echo "Docker data dirs:"
docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/skynet-webportal/docker/data/*
# Largest dirs/files
echo
echo "Dirs or files over 1GB (first 100):"
docker run -v /home/user:/home/user alpine:3.15.0 du -h /home/user | grep -E "^[0-9]+\.?[0-9]*G" | sort -r -n | head -100
}
# Check argument is present
if [ -z "$1" ]; then
# Dump to stdout
dump
else
# Handle log paths
filename=$(basename "$1")
dirname=$(dirname "$1")
abs_dirname=$(realpath "$dirname")
# Make sure log dir exists
mkdir -p "$abs_dirname"
# Append to file
{
dump
} >> "$abs_dirname/$filename" 2>&1
fi

View File

@ -37,6 +37,9 @@ GB = 1 << 30 # 1 GiB in bytes
FREE_DISK_SPACE_THRESHOLD = 100 * GB FREE_DISK_SPACE_THRESHOLD = 100 * GB
FREE_DISK_SPACE_THRESHOLD_CRITICAL = 60 * GB FREE_DISK_SPACE_THRESHOLD_CRITICAL = 60 * GB
# Disk usage dump log file (relative to this .py script).
DISK_USAGE_DUMP_LOG = "../../devops/disk-monitor/disk-usage-dump.log"
setup() setup()
@ -103,6 +106,11 @@ async def check_disk():
message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format( message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format(
free_space_gb free_space_gb
) )
# dump disk usage
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
os.popen(script_dir + "/disk-usage-dump.sh " + script_dir + "/" + DISK_USAGE_DUMP_LOG)
inspect = os.popen("docker inspect sia").read().strip() inspect = os.popen("docker inspect sia").read().strip()
inspect_json = json.loads(inspect) inspect_json = json.loads(inspect)
if inspect_json[0]["State"]["Running"] is True: if inspect_json[0]["State"]["Running"] is True: