Merge pull request #346 from NebulousLabs/gollum_docker_logs
Gollum will now check docker logs instead of the journal.
This commit is contained in:
commit
2554bc6238
|
@ -104,8 +104,8 @@ At this point we have almost everything running, we just need to set up your wal
|
||||||
> `docker exec caddy caddy reload --config /etc/caddy/Caddyfile`
|
> `docker exec caddy caddy reload --config /etc/caddy/Caddyfile`
|
||||||
- Restarting nginx gracefully after making changes to nginx configs
|
- Restarting nginx gracefully after making changes to nginx configs
|
||||||
> `docker exec nginx openresty -s reload`
|
> `docker exec nginx openresty -s reload`
|
||||||
- Checking siad service logs (follow last 50 lines)
|
- Checking siad service logs (last hour)
|
||||||
> `journalctl -f -n 50 --user-unit siad`
|
> `docker logs --since 1h $(docker ps -q --filter "name=^sia$")`
|
||||||
- Checking caddy logs (for example in case ssl certificate fails)
|
- Checking caddy logs (for example in case ssl certificate fails)
|
||||||
> `docker logs caddy -f`
|
> `docker logs caddy -f`
|
||||||
- Checking nginx logs (nginx handles all communication to siad instances)
|
- Checking nginx logs (nginx handles all communication to siad instances)
|
||||||
|
|
|
@ -5,15 +5,22 @@ health-checker runs simple health checks on a portal node using the siad API and
|
||||||
dispatches messages to a Discord channel.
|
dispatches messages to a Discord channel.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import discord, traceback
|
import discord, traceback, asyncio
|
||||||
from bot_utils import setup, send_msg, siad, sc_precision
|
from bot_utils import setup, send_msg, siad, sc_precision
|
||||||
|
|
||||||
bot_token = setup()
|
bot_token = setup()
|
||||||
client = discord.Client()
|
client = discord.Client()
|
||||||
|
|
||||||
|
|
||||||
|
async def exit_after(delay):
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
await run_checks()
|
await run_checks()
|
||||||
|
asyncio.create_task(exit_after(30))
|
||||||
await client.close()
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -1,82 +1,91 @@
|
||||||
#!/usr/bin/env python3
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
import discord, sys, traceback, io
|
import discord, sys, traceback, io, os, asyncio
|
||||||
from bot_utils import setup, send_msg, sc_precision
|
from bot_utils import setup, send_msg
|
||||||
|
|
||||||
from datetime import datetime, timedelta
|
from datetime import datetime, timedelta
|
||||||
from subprocess import Popen, PIPE
|
from subprocess import Popen, PIPE
|
||||||
|
|
||||||
"""
|
"""
|
||||||
log-checker checks journal logs for siad.
|
log-checker checks the docker logs for siad.
|
||||||
|
|
||||||
Arguments:
|
Arguments:
|
||||||
1. path to a .env file (default is none so env variables can already be
|
1. path to a .env file (default is none so env variables can already be
|
||||||
preset)
|
preset)
|
||||||
|
|
||||||
2. systemd service name (default: "siad")
|
2. docker container name name (default: "sia")
|
||||||
|
|
||||||
3. number of hours to look back in log (used as --since value in journalctl
|
3. number of hours to look back in log (default: 1 hour)
|
||||||
command) (default: 1 hour)
|
|
||||||
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
DEFAULT_CHECK_INTERVAL = timedelta(hours=1)
|
# The default check interval in hours.
|
||||||
|
DEFAULT_CHECK_INTERVAL = 1
|
||||||
|
|
||||||
bot_token = setup()
|
bot_token = setup()
|
||||||
client = discord.Client()
|
client = discord.Client()
|
||||||
|
|
||||||
|
|
||||||
|
async def exit_after(delay):
|
||||||
|
await asyncio.sleep(delay)
|
||||||
|
exit(0)
|
||||||
|
|
||||||
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
await run_checks()
|
await run_checks()
|
||||||
|
asyncio.create_task(exit_after(30))
|
||||||
await client.close()
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
async def run_checks():
|
async def run_checks():
|
||||||
print("Running Skynet portal log checks")
|
print("Running Skynet portal log checks")
|
||||||
try:
|
try:
|
||||||
await check_journal()
|
await check_docker_logs()
|
||||||
|
|
||||||
except: # catch all exceptions
|
except: # catch all exceptions
|
||||||
trace = traceback.format_exc()
|
trace = traceback.format_exc()
|
||||||
await send_msg(client, "```\n{}\n```".format(trace), force_notify=False)
|
await send_msg(client, "```\n{}\n```".format(trace), force_notify=False)
|
||||||
|
|
||||||
|
|
||||||
# check_journal checks the journal
|
# check_docker_logs checks the docker logs by filtering on the docker image name
|
||||||
async def check_journal():
|
async def check_docker_logs():
|
||||||
print("\nChecking journal...")
|
print("\nChecking docker logs...")
|
||||||
|
|
||||||
# Get the systemd service name as an argument, or use "siad" as default.
|
# Get the container name as an argument or use "sia" as default.
|
||||||
service_name = "siad"
|
container_name = "sia"
|
||||||
if len(sys.argv) > 2:
|
if len(sys.argv) > 2:
|
||||||
service_name = sys.argv[2]
|
container_name = sys.argv[2]
|
||||||
|
|
||||||
# Get the systemd service name as an argument, or use "siad" as default.
|
# Get the container id for siad.
|
||||||
check_interval = DEFAULT_CHECK_INTERVAL
|
stream = os.popen('docker ps -q --filter name=^{}$'.format(container_name))
|
||||||
|
image_id = stream.read().strip()
|
||||||
|
|
||||||
|
# Get the number of hours to look back in the logs or use 1 as default.
|
||||||
|
check_hours = DEFAULT_CHECK_INTERVAL
|
||||||
if len(sys.argv) > 3:
|
if len(sys.argv) > 3:
|
||||||
check_interval = timedelta(hours=int(sys.argv[3]))
|
check_hours = int(sys.argv[3])
|
||||||
|
|
||||||
now = datetime.now()
|
now = datetime.now()
|
||||||
time = now - check_interval
|
time = now - timedelta(hours=check_hours)
|
||||||
time_string = "{}-{}-{} {}:{}:{}".format(time.year, time.month, time.day, time.hour, time.minute, time.second)
|
time_string = "{}h".format(check_hours)
|
||||||
|
|
||||||
# Open the journal.
|
# Read the logs.
|
||||||
proc = Popen(["journalctl", "--user-unit", service_name, "--since", time_string], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True)
|
proc = Popen(["docker", "logs", "--since", time_string, image_id], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True)
|
||||||
std_out, std_err = proc.communicate()
|
std_out, std_err = proc.communicate()
|
||||||
|
|
||||||
if len(std_err) > 0:
|
if len(std_err) > 0:
|
||||||
await send_msg(client, "Error reading journalctl output: {}".format(std_err), force_notify=True)
|
await send_msg(client, "Error reading docker logs output: {}".format(std_err), force_notify=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
# If there are any critical errors. upload the whole log file.
|
# If there are any critical errors. upload the whole log file.
|
||||||
if "Critical" in std_out or "panic" in std_out:
|
if "Critical" in std_out or "panic" in std_out:
|
||||||
upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(service_name, time.year, time.month, time.day, time.hour, time.minute, time.second)
|
upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(container_name, time.year, time.month, time.day, time.hour, time.minute, time.second)
|
||||||
await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True)
|
await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True)
|
||||||
return
|
return
|
||||||
|
|
||||||
# No critical errors, return a heartbeat type messagej
|
# No critical errors, return a heartbeat type message
|
||||||
pretty_before = time.strftime("%I:%M%p")
|
pretty_before = time.strftime("%I:%M%p")
|
||||||
pretty_now = now.strftime("%I:%M%p")
|
pretty_now = now.strftime("%I:%M%p")
|
||||||
await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now))
|
await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now))
|
||||||
|
|
||||||
|
|
||||||
client.run(bot_token)
|
client.run(bot_token)
|
||||||
|
|
|
@ -17,16 +17,19 @@ sudo curl -L "https://github.com/docker/compose/releases/download/1.25.5/docker-
|
||||||
sudo chmod +x /usr/local/bin/docker-compose
|
sudo chmod +x /usr/local/bin/docker-compose
|
||||||
docker-compose --version # sanity check
|
docker-compose --version # sanity check
|
||||||
|
|
||||||
# Create dummy .env file for docker-compose usage with veriables
|
# Create dummy .env file for docker-compose usage with variables
|
||||||
# * DOMAIN_NAME - the domain name your server is using ie. example.com
|
# * DOMAIN_NAME - the domain name your server is using ie. example.com
|
||||||
# * EMAIL_ADDRESS - this is the administrator contact email you need to supply for communication regarding SSL certification
|
# * EMAIL_ADDRESS - this is the administrator contact email you need to supply for communication regarding SSL certification
|
||||||
# * HSD_API_KEY - this is auto generated secure key for your handshake service integration
|
# * HSD_API_KEY - this is auto generated secure key for your handshake service integration
|
||||||
# * CLOUDFLARE_AUTH_TOKEN` - (optional) if using cloudflare as dns loadbalancer (need to change it in Caddyfile too)
|
# * CLOUDFLARE_AUTH_TOKEN` - (optional) if using cloudflare as dns loadbalancer (need to change it in Caddyfile too)
|
||||||
# * AWS_ACCESS_KEY_ID - (optional) if using route53 as a dns loadbalancer
|
# * AWS_ACCESS_KEY_ID - (optional) if using route53 as a dns loadbalancer
|
||||||
# * AWS_SECRET_ACCESS_KEY - (optional) if using route53 as a dns loadbalancer
|
# * AWS_SECRET_ACCESS_KEY - (optional) if using route53 as a dns loadbalancer
|
||||||
|
# * API_PORT - (optional) the port on which siad is listening, defaults to 9980
|
||||||
|
# * PORTAL_NAME - the name of the portal, required by the discord bot
|
||||||
|
# * DISCORD_BOT_TOKEN - required by the discord bot
|
||||||
if ! [ -f /home/user/skynet-webportal/.env ]; then
|
if ! [ -f /home/user/skynet-webportal/.env ]; then
|
||||||
HSD_API_KEY=$(openssl rand -base64 32) # generate safe random key for handshake
|
HSD_API_KEY=$(openssl rand -base64 32) # generate safe random key for handshake
|
||||||
printf "DOMAIN_NAME=example.com\nEMAIL_ADDRESS=email@example.com\nSIA_WALLET_PASSWORD=\nHSD_API_KEY=${HSD_API_KEY}\nCLOUDFLARE_AUTH_TOKEN=\nAWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\n" > /home/user/skynet-webportal/.env
|
printf "DOMAIN_NAME=example.com\nEMAIL_ADDRESS=email@example.com\nSIA_WALLET_PASSWORD=\nHSD_API_KEY=${HSD_API_KEY}\nCLOUDFLARE_AUTH_TOKEN=\nAWS_ACCESS_KEY_ID=\nAWS_SECRET_ACCESS_KEY=\nPORTAL_NAME=\nDISCORD_BOT_TOKEN=\n" > /home/user/skynet-webportal/.env
|
||||||
fi
|
fi
|
||||||
|
|
||||||
# Start docker container with nginx and client
|
# Start docker container with nginx and client
|
||||||
|
|
|
@ -8,8 +8,8 @@ sudo apt-get -y install python3-pip
|
||||||
pip3 install discord.py
|
pip3 install discord.py
|
||||||
pip3 install python-dotenv
|
pip3 install python-dotenv
|
||||||
|
|
||||||
fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/.sia/sia.env"
|
fundsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/funds-checker.py /home/user/skynet-webportal/.env"
|
||||||
logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/.sia/sia.env siad 8"
|
logsCheck="0 0,8,16 * * * /home/user/skynet-webportal/setup-scripts/log-checker.py /home/user/skynet-webportal/.env sia 8"
|
||||||
|
|
||||||
(crontab -u user -l; echo "$fundsCheck" ) | crontab -u user -
|
(crontab -u user -l; echo "$fundsCheck" ) | crontab -u user -
|
||||||
(crontab -u user -l; echo "$logsCheck" ) | crontab -u user -
|
(crontab -u user -l; echo "$logsCheck" ) | crontab -u user -
|
||||||
|
|
Reference in New Issue