Merge pull request #361 from NebulousLabs/ivo/ensure_the_process_exits
Ensure the log checker process exits
This commit is contained in:
commit
bf72a7e0c2
|
@ -78,4 +78,4 @@ docker/data
|
||||||
# Cache files
|
# Cache files
|
||||||
__pycache__
|
__pycache__
|
||||||
/.idea/
|
/.idea/
|
||||||
/venv/
|
/venv*
|
||||||
|
|
|
@ -5,7 +5,7 @@ health-checker runs simple health checks on a portal node using the siad API and
|
||||||
dispatches messages to a Discord channel.
|
dispatches messages to a Discord channel.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import discord, traceback, asyncio
|
import discord, traceback, asyncio, os
|
||||||
from bot_utils import setup, send_msg, siad, sc_precision
|
from bot_utils import setup, send_msg, siad, sc_precision
|
||||||
|
|
||||||
bot_token = setup()
|
bot_token = setup()
|
||||||
|
@ -14,13 +14,13 @@ client = discord.Client()
|
||||||
|
|
||||||
async def exit_after(delay):
|
async def exit_after(delay):
|
||||||
await asyncio.sleep(delay)
|
await asyncio.sleep(delay)
|
||||||
exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
await run_checks()
|
await run_checks()
|
||||||
asyncio.create_task(exit_after(30))
|
asyncio.create_task(exit_after(3))
|
||||||
await client.close()
|
await client.close()
|
||||||
|
|
||||||
|
|
||||||
|
@ -75,4 +75,5 @@ async def check_health():
|
||||||
# Send an informational heartbeat if all checks passed.
|
# Send an informational heartbeat if all checks passed.
|
||||||
await send_msg(client, "Health checks passed:\n{} \n{}".format(balance_msg, alloc_msg))
|
await send_msg(client, "Health checks passed:\n{} \n{}".format(balance_msg, alloc_msg))
|
||||||
|
|
||||||
|
|
||||||
client.run(bot_token)
|
client.run(bot_token)
|
||||||
|
|
|
@ -28,14 +28,13 @@ client = discord.Client()
|
||||||
# exit_after kills the script if it hasn't exited on its own after `delay` seconds
|
# exit_after kills the script if it hasn't exited on its own after `delay` seconds
|
||||||
async def exit_after(delay):
|
async def exit_after(delay):
|
||||||
await asyncio.sleep(delay)
|
await asyncio.sleep(delay)
|
||||||
exit(0)
|
os._exit(0)
|
||||||
|
|
||||||
|
|
||||||
@client.event
|
@client.event
|
||||||
async def on_ready():
|
async def on_ready():
|
||||||
await run_checks()
|
await run_checks()
|
||||||
asyncio.create_task(exit_after(30))
|
asyncio.create_task(exit_after(3))
|
||||||
await client.close()
|
|
||||||
|
|
||||||
|
|
||||||
async def run_checks():
|
async def run_checks():
|
||||||
|
@ -71,12 +70,6 @@ async def check_docker_logs():
|
||||||
if len(sys.argv) > 2:
|
if len(sys.argv) > 2:
|
||||||
container_name = sys.argv[2]
|
container_name = sys.argv[2]
|
||||||
|
|
||||||
# Get the container id for siad.
|
|
||||||
cmd = 'docker ps -q --filter name=^{}$'.format(container_name)
|
|
||||||
print("[DEBUG] will run `{}`".format(cmd))
|
|
||||||
stream = os.popen(cmd)
|
|
||||||
image_id = stream.read().strip()
|
|
||||||
|
|
||||||
# Get the number of hours to look back in the logs or use 1 as default.
|
# Get the number of hours to look back in the logs or use 1 as default.
|
||||||
check_hours = DEFAULT_CHECK_INTERVAL
|
check_hours = DEFAULT_CHECK_INTERVAL
|
||||||
if len(sys.argv) > 3:
|
if len(sys.argv) > 3:
|
||||||
|
@ -87,8 +80,8 @@ async def check_docker_logs():
|
||||||
time_string = "{}h".format(check_hours)
|
time_string = "{}h".format(check_hours)
|
||||||
|
|
||||||
# Read the logs.
|
# Read the logs.
|
||||||
print("[DEBUG] Will run `docker logs --since {} {}`".format(time_string, image_id))
|
print("[DEBUG] Will run `docker logs --since {} {}`".format(time_string, container_name))
|
||||||
proc = Popen(["docker", "logs", "--since", time_string, image_id], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True)
|
proc = Popen(["docker", "logs", "--since", time_string, container_name], stdin=PIPE, stdout=PIPE, stderr=PIPE, text=True)
|
||||||
std_out, std_err = proc.communicate()
|
std_out, std_err = proc.communicate()
|
||||||
|
|
||||||
if len(std_err) > 0:
|
if len(std_err) > 0:
|
||||||
|
@ -109,7 +102,7 @@ async def check_docker_logs():
|
||||||
return
|
return
|
||||||
|
|
||||||
# If there are any critical errors. upload the whole log file.
|
# If there are any critical errors. upload the whole log file.
|
||||||
if "Critical" in std_out or "panic" in std_out:
|
if 'Critical' in std_out or 'panic' in std_out:
|
||||||
upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(container_name, time.year, time.month, time.day, time.hour, time.minute, time.second)
|
upload_name = "{}-{}-{}-{}-{}:{}:{}.log".format(container_name, time.year, time.month, time.day, time.hour, time.minute, time.second)
|
||||||
await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True)
|
await send_msg(client, "Critical error found in log!", file=discord.File(io.BytesIO(std_out.encode()), filename=upload_name), force_notify=True)
|
||||||
return
|
return
|
||||||
|
@ -119,4 +112,5 @@ async def check_docker_logs():
|
||||||
pretty_now = now.strftime("%I:%M%p")
|
pretty_now = now.strftime("%I:%M%p")
|
||||||
await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now))
|
await send_msg(client, "No critical warnings in log from `{}` to `{}`".format(pretty_before, pretty_now))
|
||||||
|
|
||||||
|
|
||||||
client.run(bot_token)
|
client.run(bot_token)
|
||||||
|
|
Reference in New Issue