diff --git a/changelog/items/other/dump-disk-space-usage.md b/changelog/items/other/dump-disk-space-usage.md new file mode 100644 index 00000000..3c1c3147 --- /dev/null +++ b/changelog/items/other/dump-disk-space-usage.md @@ -0,0 +1,2 @@ +- Dump disk space usage when health-checker script disables portal due to + critical free disk space. \ No newline at end of file diff --git a/setup-scripts/disk-usage-dump.sh b/setup-scripts/disk-usage-dump.sh new file mode 100644 index 00000000..9326d459 --- /dev/null +++ b/setup-scripts/disk-usage-dump.sh @@ -0,0 +1,59 @@ +#!/bin/bash + +# Dumps disk usage to stdout or to the file +# +# Parameters: +# - $1 (optional): Filename to append the output to. +# +# Usage: +# - Dump disk usage to stdout: +# ./disk-usage-dump.sh +# +# - Dump disk usage appending to th file: +# ./disk-usage-dump.sh my-log-file.log +# +# Use docker container to get root (script can be run under regular user, no +# need for sudo) + +dump () { + echo + echo "### Disk usage dump at $(date) ###" + + # Free disk space + echo + df -h /home/user + + # Home dirs + echo + echo "Home dirs:" + docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/* + + # Docker data dirs + echo + echo "Docker data dirs:" + docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/skynet-webportal/docker/data/* + + # Largest dirs/files + echo + echo "Dirs or files over 1GB (first 100):" + docker run -v /home/user:/home/user alpine:3.15.0 du -h /home/user | grep -E "^[0-9]+\.?[0-9]*G" | sort -r -n | head -100 +} + +# Check argument is present +if [ -z "$1" ]; then + # Dump to stdout + dump +else + # Handle log paths + filename=$(basename "$1") + dirname=$(dirname "$1") + abs_dirname=$(realpath "$dirname") + + # Make sure log dir exists + mkdir -p "$abs_dirname" + + # Append to file + { + dump + } >> "$abs_dirname/$filename" 2>&1 +fi diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index a2bbbcea..c5023bf8 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -37,6 +37,9 @@ GB = 1 << 30 # 1 GiB in bytes FREE_DISK_SPACE_THRESHOLD = 100 * GB FREE_DISK_SPACE_THRESHOLD_CRITICAL = 60 * GB +# Disk usage dump log file (relative to this .py script). +DISK_USAGE_DUMP_LOG = "../../devops/disk-monitor/disk-usage-dump.log" + setup() @@ -103,6 +106,11 @@ async def check_disk(): message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format( free_space_gb ) + + # dump disk usage + script_dir = os.path.dirname(os.path.realpath(sys.argv[0])) + os.popen(script_dir + "/disk-usage-dump.sh " + script_dir + "/" + DISK_USAGE_DUMP_LOG) + inspect = os.popen("docker inspect sia").read().strip() inspect_json = json.loads(inspect) if inspect_json[0]["State"]["Running"] is True: