Merge branch 'master' into sevey/disabl-load-check
This commit is contained in:
commit
3d11ca503f
|
@ -0,0 +1,2 @@
|
||||||
|
- Dump disk space usage when health-checker script disables portal due to
|
||||||
|
critical free disk space.
|
|
@ -0,0 +1 @@
|
||||||
|
- Set `min_free` parameter on the `proxy_cache_path` directive to `100g`
|
|
@ -70,7 +70,7 @@ http {
|
||||||
proxy_http_version 1.1;
|
proxy_http_version 1.1;
|
||||||
|
|
||||||
# proxy cache definition
|
# proxy cache definition
|
||||||
proxy_cache_path /data/nginx/cache levels=1:2 keys_zone=skynet:10m max_size=50g inactive=48h use_temp_path=off;
|
proxy_cache_path /data/nginx/cache levels=1:2 keys_zone=skynet:10m max_size=50g min_free=100g inactive=48h use_temp_path=off;
|
||||||
|
|
||||||
# this runs before forking out nginx worker processes
|
# this runs before forking out nginx worker processes
|
||||||
init_by_lua_block {
|
init_by_lua_block {
|
||||||
|
|
Binary file not shown.
|
@ -0,0 +1,59 @@
|
||||||
|
#!/bin/bash
|
||||||
|
|
||||||
|
# Dumps disk usage to stdout or to the file
|
||||||
|
#
|
||||||
|
# Parameters:
|
||||||
|
# - $1 (optional): Filename to append the output to.
|
||||||
|
#
|
||||||
|
# Usage:
|
||||||
|
# - Dump disk usage to stdout:
|
||||||
|
# ./disk-usage-dump.sh
|
||||||
|
#
|
||||||
|
# - Dump disk usage appending to th file:
|
||||||
|
# ./disk-usage-dump.sh my-log-file.log
|
||||||
|
#
|
||||||
|
# Use docker container to get root (script can be run under regular user, no
|
||||||
|
# need for sudo)
|
||||||
|
|
||||||
|
dump () {
|
||||||
|
echo
|
||||||
|
echo "### Disk usage dump at $(date) ###"
|
||||||
|
|
||||||
|
# Free disk space
|
||||||
|
echo
|
||||||
|
df -h /home/user
|
||||||
|
|
||||||
|
# Home dirs
|
||||||
|
echo
|
||||||
|
echo "Home dirs:"
|
||||||
|
docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/*
|
||||||
|
|
||||||
|
# Docker data dirs
|
||||||
|
echo
|
||||||
|
echo "Docker data dirs:"
|
||||||
|
docker run -v /home/user:/home/user alpine:3.15.0 du -hs /home/user/skynet-webportal/docker/data/*
|
||||||
|
|
||||||
|
# Largest dirs/files
|
||||||
|
echo
|
||||||
|
echo "Dirs or files over 1GB (first 100):"
|
||||||
|
docker run -v /home/user:/home/user alpine:3.15.0 du -h /home/user | grep -E "^[0-9]+\.?[0-9]*G" | sort -r -n | head -100
|
||||||
|
}
|
||||||
|
|
||||||
|
# Check argument is present
|
||||||
|
if [ -z "$1" ]; then
|
||||||
|
# Dump to stdout
|
||||||
|
dump
|
||||||
|
else
|
||||||
|
# Handle log paths
|
||||||
|
filename=$(basename "$1")
|
||||||
|
dirname=$(dirname "$1")
|
||||||
|
abs_dirname=$(realpath "$dirname")
|
||||||
|
|
||||||
|
# Make sure log dir exists
|
||||||
|
mkdir -p "$abs_dirname"
|
||||||
|
|
||||||
|
# Append to file
|
||||||
|
{
|
||||||
|
dump
|
||||||
|
} >> "$abs_dirname/$filename" 2>&1
|
||||||
|
fi
|
|
@ -37,6 +37,9 @@ GB = 1 << 30 # 1 GiB in bytes
|
||||||
FREE_DISK_SPACE_THRESHOLD = 100 * GB
|
FREE_DISK_SPACE_THRESHOLD = 100 * GB
|
||||||
FREE_DISK_SPACE_THRESHOLD_CRITICAL = 60 * GB
|
FREE_DISK_SPACE_THRESHOLD_CRITICAL = 60 * GB
|
||||||
|
|
||||||
|
# Disk usage dump log file (relative to this .py script).
|
||||||
|
DISK_USAGE_DUMP_LOG = "../../devops/disk-monitor/disk-usage-dump.log"
|
||||||
|
|
||||||
setup()
|
setup()
|
||||||
|
|
||||||
|
|
||||||
|
@ -105,11 +108,16 @@ async def check_disk():
|
||||||
message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format(
|
message = "CRITICAL! Very low disk space: {}GiB, **siad stopped**!".format(
|
||||||
free_space_gb
|
free_space_gb
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# dump disk usage
|
||||||
|
script_dir = os.path.dirname(os.path.realpath(sys.argv[0]))
|
||||||
|
os.popen(script_dir + "/disk-usage-dump.sh " + script_dir + "/" + DISK_USAGE_DUMP_LOG)
|
||||||
|
|
||||||
inspect = os.popen("docker inspect sia").read().strip()
|
inspect = os.popen("docker inspect sia").read().strip()
|
||||||
inspect_json = json.loads(inspect)
|
inspect_json = json.loads(inspect)
|
||||||
if inspect_json[0]["State"]["Running"] is True:
|
if inspect_json[0]["State"]["Running"] is True:
|
||||||
# mark portal as unhealthy
|
# mark portal as unhealthy
|
||||||
os.popen("docker exec health-check cli/disable")
|
os.popen("docker exec health-check cli disable 'critical free disk space'")
|
||||||
time.sleep(300) # wait 5 minutes to propagate dns changes
|
time.sleep(300) # wait 5 minutes to propagate dns changes
|
||||||
os.popen("docker stop sia") # stop sia container
|
os.popen("docker stop sia") # stop sia container
|
||||||
return await send_msg(message, force_notify=True)
|
return await send_msg(message, force_notify=True)
|
||||||
|
|
Reference in New Issue