diff --git a/.gitignore b/.gitignore index f5cca07b..1687dfc7 100644 --- a/.gitignore +++ b/.gitignore @@ -85,3 +85,17 @@ docker/nginx/conf.d/server-override/* __pycache__ /.idea/ /venv* + +# CockroachDB certificates +docker/cockroach/certs/*.crt +docker/cockroach/certs/*.key +docker/kratos/cr_certs/*.crt +docker/kratos/cr_certs/*.key + +# Oathkeeper JWKS signing token +docker/kratos/oathkeeper/id_token.jwks.json +docker/kratos/config/kratos.yml + +# Setup-script log files +setup-scripts/serverload.log +setup-scripts/serverload.json diff --git a/changelog/items/key-updates/serverload.md b/changelog/items/key-updates/serverload.md new file mode 100644 index 00000000..c626b753 --- /dev/null +++ b/changelog/items/key-updates/serverload.md @@ -0,0 +1 @@ +- Add `/serverload` endpoint for CPU usage and free disk space diff --git a/changelog/items/other/nginx-prune.md b/changelog/items/other/nginx-prune.md new file mode 100644 index 00000000..42581090 --- /dev/null +++ b/changelog/items/other/nginx-prune.md @@ -0,0 +1 @@ +- Added script to prune nginx cache. \ No newline at end of file diff --git a/docker/nginx/conf.d/server/server.api b/docker/nginx/conf.d/server/server.api index 4402fc07..878569db 100644 --- a/docker/nginx/conf.d/server/server.api +++ b/docker/nginx/conf.d/server/server.api @@ -71,6 +71,21 @@ location /skynet/stats { proxy_pass http://sia:9980/skynet/stats; } +# Define path for server load endpoint +location /serverload { + # Define root directory in the nginx container to load file from + root /usr/local/share; + + # including this because of peer pressure from the other routes + include /etc/nginx/conf.d/include/cors; + + # tell nginx to expect json + default_type 'application/json'; + + # Allow for /serverload to load /serverload.json file + try_files $uri $uri.json =404; +} + location /skynet/health { include /etc/nginx/conf.d/include/cors; diff --git a/scripts/README.md b/scripts/README.md index e7b909b4..2085eff7 100644 --- a/scripts/README.md +++ b/scripts/README.md @@ -29,6 +29,12 @@ the health check. The `portal-upgrade.sh` script upgrades the docker images for a portal and clears and leftover images. +**nginx-prune.sh**\ +The `nginx-prune.sh` script deletes all entries from nginx cache larger than +the given size and smaller entries until nginx cache disk size is smaller than +the given cache size limit. Both values are configured in +`lib/nginx-prune-cache-subscript.sh`. The script doesn't require `sudo`. + ## Webportal Upgrade Procedures TODO... diff --git a/scripts/lib/nginx-prune-cache-subscript.sh b/scripts/lib/nginx-prune-cache-subscript.sh new file mode 100755 index 00000000..99edb899 --- /dev/null +++ b/scripts/lib/nginx-prune-cache-subscript.sh @@ -0,0 +1,30 @@ +#!/usr/local/bin/bash + +# This subscript is expected to be run inside docker container using 'bash' +# image. The image is based on Alpine Linux. It's tools (find, stat, awk, sort) +# are non-standard versions from BusyBox. + +MAX_CACHE_DIR_SIZE=20000000000 +MAX_KEEP_FILE_SIZE=1000000000 + +total=0 + +# We sort files by time, newest files are first. Format is: +# time (last modification as seconds since Epoch), filepath, size (bytes) +find /home/user/skynet-webportal/docker/data/nginx/cache -type f -exec stat -c "%Y %n %s" {} + | sort -rgk1 | while read line +do + size=$(echo $line | awk '{print $3}') + new_total=$(($total + $size)) + + # We always delete all files larger than MAX_KEEP_FILE_SIZE. + # We keep all files smaller than MAX_KEEP_FILE_SIZE when cache size is + # below MAX_CACHE_DIR_SIZE, then we delete also smaller files. + if (("$size" <= "$MAX_KEEP_FILE_SIZE" && "$new_total" < "$MAX_CACHE_DIR_SIZE")) + then + total=$new_total + continue + fi + + filename=$(echo $line | awk '{print $2}') + rm $filename +done diff --git a/scripts/nginx-prune.sh b/scripts/nginx-prune.sh new file mode 100755 index 00000000..f67d29e7 --- /dev/null +++ b/scripts/nginx-prune.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +# We execute the nginx cache pruning subscript from docker container so that we +# can run the pruning script in user crontab without sudo. + +docker run --rm -v /home/user:/home/user bash /home/user/skynet-webportal/scripts/lib/nginx-prune-cache-subscript.sh diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index b66459a8..091ebe30 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -226,7 +226,8 @@ async def check_health(): message += "{}/{} CRITICAL checks failed over the last {} hours! ".format( critical_checks_failed, critical_checks_total, CHECK_HOURS ) - force_notify = True + # Disabling as it creates notification fatigue. + # force_notify = True else: message += "All {} critical checks passed. ".format(critical_checks_total) @@ -234,7 +235,8 @@ async def check_health(): message += "{}/{} extended checks failed over the last {} hours! ".format( extended_checks_failed, extended_checks_total, CHECK_HOURS ) - force_notify = True + # Disabling as it creates notification fatigue. + # force_notify = True else: message += "All {} extended checks passed. ".format(extended_checks_total) diff --git a/setup-scripts/serverload.service b/setup-scripts/serverload.service new file mode 100644 index 00000000..5d6a41d4 --- /dev/null +++ b/setup-scripts/serverload.service @@ -0,0 +1,8 @@ +[Unit] +Description=Ensure serverload script is running to provide serverload stats. + +[Service] +ExecStart=/bin/bash /home/user/skynet-webportal/serverload.sh + +[Install] +WantedBy=multi-user.target diff --git a/setup-scripts/serverload.sh b/setup-scripts/serverload.sh new file mode 100755 index 00000000..6945bcb0 --- /dev/null +++ b/setup-scripts/serverload.sh @@ -0,0 +1,55 @@ +#!/bin/bash + +: ' +This script writes the CPU usage and the free disk space to a file in a loop. +The results are prepended to the file, so the most recent results are at the +top. This is so that the most recent information can easily be read from the +top of the file and the file can easily be truncated if needed. + +This script is run by the serverload.service systemd process. The +serverload.service file should be copied to +/etc/systemd/system/serverload.service. + +The systemd process can then be started with the following commands: +sudo systemctl start serverload.service + +The status of the process can be checked with: +sudo systemctl is-active serverload.service +' + +# Define Loop Interval +loop_interval=60 +webportal_repo_setup_scripts="/home/user/skynet-webportal/setup-scripts" +logfile_name="serverload.log" +logfile=$webportal_repo_setup_scripts/$logfile_name +jsonfile="serverload.json" +nginx_docker_path="/usr/local/share" + +# Create logfile if it doesn't exist +if [[ ! -e $logfile ]]; then + echo "init" > $logfile +fi + +# Write the output in an infinite loop. +while true; do + # CPU usage + cpu=$(echo $[100-$(vmstat 1 2|tail -1|awk '{print $15}')]) + sed -i "1iCPU: ${cpu}" $logfile + + # Disk Usage + disk=$(df -Ph . | tail -1 | awk '{print $4}') + sed -i "1iDISK: ${disk}" $logfile + + # Write the timestamp + timestamp=$(date) + sed -i "1iTIMESTAMP: ${timestamp}" $logfile + + # Write and copy a json file of the latest results to nginx docker container + # to serve + printf '{"cpu":"%s","disk":"%s","timestamp":"%s"}' "$cpu" "$disk" "$timestamp" > $webportal_repo_setup_scripts/$jsonfile + docker cp $webportal_repo_setup_scripts/$jsonfile nginx:$nginx_docker_path/$jsonfile + + # Sleep + sleep $loop_interval +done + diff --git a/setup-scripts/support/crontab b/setup-scripts/support/crontab index ad766264..29c8ec1a 100644 --- a/setup-scripts/support/crontab +++ b/setup-scripts/support/crontab @@ -4,3 +4,4 @@ 30 */4 * * * /home/user/skynet-webportal/setup-scripts/blocklist-airtable.py /home/user/skynet-webportal/.env 0 4 * * * /home/user/skynet-webportal/scripts/db_backup.sh 1 >> /home/user/skynet-webportal/logs/db_backup_`date +"%Y-%m-%d-%H%M"`.log 2 > &1 0 5 * * * /home/user/skynet-webportal/scripts/es_cleaner.py 1 http://localhost:9200 +15 * * * * /home/user/skynet-webportal/scripts/nginx-prune.sh