Merge branch 'master' into accounts-api-refactor

# Conflicts:
#	.gitignore
This commit is contained in:
Ivaylo Novakov 2021-12-09 14:03:59 +01:00
commit a4242a9234
No known key found for this signature in database
GPG Key ID: 06B9354AB08BE9C6
11 changed files with 141 additions and 2 deletions

14
.gitignore vendored
View File

@ -85,3 +85,17 @@ docker/nginx/conf.d/server-override/*
__pycache__
/.idea/
/venv*
# CockroachDB certificates
docker/cockroach/certs/*.crt
docker/cockroach/certs/*.key
docker/kratos/cr_certs/*.crt
docker/kratos/cr_certs/*.key
# Oathkeeper JWKS signing token
docker/kratos/oathkeeper/id_token.jwks.json
docker/kratos/config/kratos.yml
# Setup-script log files
setup-scripts/serverload.log
setup-scripts/serverload.json

View File

@ -0,0 +1 @@
- Add `/serverload` endpoint for CPU usage and free disk space

View File

@ -0,0 +1 @@
- Added script to prune nginx cache.

View File

@ -71,6 +71,21 @@ location /skynet/stats {
proxy_pass http://sia:9980/skynet/stats;
}
# Define path for server load endpoint
location /serverload {
# Define root directory in the nginx container to load file from
root /usr/local/share;
# including this because of peer pressure from the other routes
include /etc/nginx/conf.d/include/cors;
# tell nginx to expect json
default_type 'application/json';
# Allow for /serverload to load /serverload.json file
try_files $uri $uri.json =404;
}
location /skynet/health {
include /etc/nginx/conf.d/include/cors;

View File

@ -29,6 +29,12 @@ the health check.
The `portal-upgrade.sh` script upgrades the docker images for a portal and
clears and leftover images.
**nginx-prune.sh**\
The `nginx-prune.sh` script deletes all entries from nginx cache larger than
the given size and smaller entries until nginx cache disk size is smaller than
the given cache size limit. Both values are configured in
`lib/nginx-prune-cache-subscript.sh`. The script doesn't require `sudo`.
## Webportal Upgrade Procedures
TODO...

View File

@ -0,0 +1,30 @@
#!/usr/local/bin/bash
# This subscript is expected to be run inside docker container using 'bash'
# image. The image is based on Alpine Linux. It's tools (find, stat, awk, sort)
# are non-standard versions from BusyBox.
MAX_CACHE_DIR_SIZE=20000000000
MAX_KEEP_FILE_SIZE=1000000000
total=0
# We sort files by time, newest files are first. Format is:
# time (last modification as seconds since Epoch), filepath, size (bytes)
find /home/user/skynet-webportal/docker/data/nginx/cache -type f -exec stat -c "%Y %n %s" {} + | sort -rgk1 | while read line
do
size=$(echo $line | awk '{print $3}')
new_total=$(($total + $size))
# We always delete all files larger than MAX_KEEP_FILE_SIZE.
# We keep all files smaller than MAX_KEEP_FILE_SIZE when cache size is
# below MAX_CACHE_DIR_SIZE, then we delete also smaller files.
if (("$size" <= "$MAX_KEEP_FILE_SIZE" && "$new_total" < "$MAX_CACHE_DIR_SIZE"))
then
total=$new_total
continue
fi
filename=$(echo $line | awk '{print $2}')
rm $filename
done

6
scripts/nginx-prune.sh Executable file
View File

@ -0,0 +1,6 @@
#!/bin/bash
# We execute the nginx cache pruning subscript from docker container so that we
# can run the pruning script in user crontab without sudo.
docker run --rm -v /home/user:/home/user bash /home/user/skynet-webportal/scripts/lib/nginx-prune-cache-subscript.sh

View File

@ -226,7 +226,8 @@ async def check_health():
message += "{}/{} CRITICAL checks failed over the last {} hours! ".format(
critical_checks_failed, critical_checks_total, CHECK_HOURS
)
force_notify = True
# Disabling as it creates notification fatigue.
# force_notify = True
else:
message += "All {} critical checks passed. ".format(critical_checks_total)
@ -234,7 +235,8 @@ async def check_health():
message += "{}/{} extended checks failed over the last {} hours! ".format(
extended_checks_failed, extended_checks_total, CHECK_HOURS
)
force_notify = True
# Disabling as it creates notification fatigue.
# force_notify = True
else:
message += "All {} extended checks passed. ".format(extended_checks_total)

View File

@ -0,0 +1,8 @@
[Unit]
Description=Ensure serverload script is running to provide serverload stats.
[Service]
ExecStart=/bin/bash /home/user/skynet-webportal/serverload.sh
[Install]
WantedBy=multi-user.target

55
setup-scripts/serverload.sh Executable file
View File

@ -0,0 +1,55 @@
#!/bin/bash
: '
This script writes the CPU usage and the free disk space to a file in a loop.
The results are prepended to the file, so the most recent results are at the
top. This is so that the most recent information can easily be read from the
top of the file and the file can easily be truncated if needed.
This script is run by the serverload.service systemd process. The
serverload.service file should be copied to
/etc/systemd/system/serverload.service.
The systemd process can then be started with the following commands:
sudo systemctl start serverload.service
The status of the process can be checked with:
sudo systemctl is-active serverload.service
'
# Define Loop Interval
loop_interval=60
webportal_repo_setup_scripts="/home/user/skynet-webportal/setup-scripts"
logfile_name="serverload.log"
logfile=$webportal_repo_setup_scripts/$logfile_name
jsonfile="serverload.json"
nginx_docker_path="/usr/local/share"
# Create logfile if it doesn't exist
if [[ ! -e $logfile ]]; then
echo "init" > $logfile
fi
# Write the output in an infinite loop.
while true; do
# CPU usage
cpu=$(echo $[100-$(vmstat 1 2|tail -1|awk '{print $15}')])
sed -i "1iCPU: ${cpu}" $logfile
# Disk Usage
disk=$(df -Ph . | tail -1 | awk '{print $4}')
sed -i "1iDISK: ${disk}" $logfile
# Write the timestamp
timestamp=$(date)
sed -i "1iTIMESTAMP: ${timestamp}" $logfile
# Write and copy a json file of the latest results to nginx docker container
# to serve
printf '{"cpu":"%s","disk":"%s","timestamp":"%s"}' "$cpu" "$disk" "$timestamp" > $webportal_repo_setup_scripts/$jsonfile
docker cp $webportal_repo_setup_scripts/$jsonfile nginx:$nginx_docker_path/$jsonfile
# Sleep
sleep $loop_interval
done

View File

@ -4,3 +4,4 @@
30 */4 * * * /home/user/skynet-webportal/setup-scripts/blocklist-airtable.py /home/user/skynet-webportal/.env
0 4 * * * /home/user/skynet-webportal/scripts/db_backup.sh 1 >> /home/user/skynet-webportal/logs/db_backup_`date +"%Y-%m-%d-%H%M"`.log 2 > &1
0 5 * * * /home/user/skynet-webportal/scripts/es_cleaner.py 1 http://localhost:9200
15 * * * * /home/user/skynet-webportal/scripts/nginx-prune.sh