From 9ebaddc47686c1595c46b68540474163ac7bd255 Mon Sep 17 00:00:00 2001 From: Ivaylo Novakov Date: Wed, 6 Oct 2021 14:36:46 +0200 Subject: [PATCH] Merge pull request #1260 from SkynetLabs/fix-health-check-script fix health check script invalid syntax --- .github/workflows/python-lint.yml | 37 +++++++++ scripts/es_cleaner.py | 119 +++++++++++++++++++--------- setup-scripts/blocklist-airtable.py | 90 ++++++++++++++++----- setup-scripts/bot_utils.py | 16 +++- setup-scripts/funds-checker.py | 4 +- setup-scripts/health-checker.py | 21 +++-- setup-scripts/log-checker.py | 7 +- 7 files changed, 223 insertions(+), 71 deletions(-) create mode 100644 .github/workflows/python-lint.yml diff --git a/.github/workflows/python-lint.yml b/.github/workflows/python-lint.yml new file mode 100644 index 00000000..71cb6938 --- /dev/null +++ b/.github/workflows/python-lint.yml @@ -0,0 +1,37 @@ +name: Python Lint + +on: + push: + paths: + - "**.py" + +jobs: + black: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.x" + architecture: x64 + + - run: pip install black + - run: black --check . + + flake8: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v2 + - uses: actions/setup-python@v2 + with: + python-version: "3.x" + architecture: x64 + + - run: pip install flake8 + + # E203: https://www.flake8rules.com/rules/E203.html - Whitespace before ':' + # E501: https://www.flake8rules.com/rules/E501.html - Line too long + # W503: https://www.flake8rules.com/rules/W503.html - Line break occurred before a binary operator + # W605: https://www.flake8rules.com/rules/W605.html - Invalid escape sequence + # E722: https://www.flake8rules.com/rules/E722.html - Do not use bare except, specify exception instead + - run: flake8 --max-line-length 88 --ignore E203,E501,W503,W605,E722 diff --git a/scripts/es_cleaner.py b/scripts/es_cleaner.py index 336704cf..48f2a0f3 100755 --- a/scripts/es_cleaner.py +++ b/scripts/es_cleaner.py @@ -6,45 +6,72 @@ import os import ssl import sys -TIMEOUT=120 +TIMEOUT = 120 + def main(): if len(sys.argv) != 3: - print('USAGE: [INDEX_PREFIX=(default "")] [ARCHIVE=(default false)] ... {} NUM_OF_DAYS http://HOSTNAME[:PORT]'.format(sys.argv[0])) - print('NUM_OF_DAYS ... delete indices that are older than the given number of days.') - print('HOSTNAME ... specifies which Elasticsearch hosts URL to search and delete indices from.') - print('TIMEOUT ... number of seconds to wait for master node response.'.format(TIMEOUT)) - print('INDEX_PREFIX ... specifies index prefix.') - print('INDEX_DATE_SEPARATOR ... specifies index date separator.') - print('ARCHIVE ... specifies whether to remove archive indices (only works for rollover) (default false).') - print('ROLLOVER ... specifies whether to remove indices created by rollover (default false).') - print('ES_USERNAME ... The username required by Elasticsearch.') - print('ES_PASSWORD ... The password required by Elasticsearch.') - print('ES_TLS ... enable TLS (default false).') - print('ES_TLS_CA ... Path to TLS CA file.') - print('ES_TLS_CERT ... Path to TLS certificate file.') - print('ES_TLS_KEY ... Path to TLS key file.') - print('ES_TLS_SKIP_HOST_VERIFY ... (insecure) Skip server\'s certificate chain and host name verification.') + print( + 'USAGE: [INDEX_PREFIX=(default "")] [ARCHIVE=(default false)] ... {} NUM_OF_DAYS http://HOSTNAME[:PORT]'.format( + sys.argv[0] + ) + ) + print( + "NUM_OF_DAYS ... delete indices that are older than the given number of days." + ) + print( + "HOSTNAME ... specifies which Elasticsearch hosts URL to search and delete indices from." + ) + print( + "TIMEOUT ... number of seconds to wait for master node response, default: {}".format( + TIMEOUT + ) + ) + print("INDEX_PREFIX ... specifies index prefix.") + print("INDEX_DATE_SEPARATOR ... specifies index date separator.") + print( + "ARCHIVE ... specifies whether to remove archive indices (only works for rollover) (default false)." + ) + print( + "ROLLOVER ... specifies whether to remove indices created by rollover (default false)." + ) + print("ES_USERNAME ... The username required by Elasticsearch.") + print("ES_PASSWORD ... The password required by Elasticsearch.") + print("ES_TLS ... enable TLS (default false).") + print("ES_TLS_CA ... Path to TLS CA file.") + print("ES_TLS_CERT ... Path to TLS certificate file.") + print("ES_TLS_KEY ... Path to TLS key file.") + print( + "ES_TLS_SKIP_HOST_VERIFY ... (insecure) Skip server's certificate chain and host name verification." + ) sys.exit(1) - client = create_client(os.getenv("ES_USERNAME"), os.getenv("ES_PASSWORD"), str2bool(os.getenv("ES_TLS", 'false')), os.getenv("ES_TLS_CA"), os.getenv("ES_TLS_CERT"), os.getenv("ES_TLS_KEY"), str2bool(os.getenv("ES_TLS_SKIP_HOST_VERIFY", 'false'))) + client = create_client( + os.getenv("ES_USERNAME"), + os.getenv("ES_PASSWORD"), + str2bool(os.getenv("ES_TLS", "false")), + os.getenv("ES_TLS_CA"), + os.getenv("ES_TLS_CERT"), + os.getenv("ES_TLS_KEY"), + str2bool(os.getenv("ES_TLS_SKIP_HOST_VERIFY", "false")), + ) ilo = curator.IndexList(client) - empty_list(ilo, 'Elasticsearch has no indices') + empty_list(ilo, "Elasticsearch has no indices") - prefix = os.getenv("INDEX_PREFIX", '') - if prefix != '': - prefix += '-' - separator = os.getenv("INDEX_DATE_SEPARATOR", '-') + prefix = os.getenv("INDEX_PREFIX", "") + if prefix != "": + prefix += "-" + separator = os.getenv("INDEX_DATE_SEPARATOR", "-") - if str2bool(os.getenv("ARCHIVE", 'false')): + if str2bool(os.getenv("ARCHIVE", "false")): filter_archive_indices_rollover(ilo, prefix) else: - if str2bool(os.getenv("ROLLOVER", 'false')): + if str2bool(os.getenv("ROLLOVER", "false")): filter_main_indices_rollover(ilo, prefix) else: filter_main_indices(ilo, prefix, separator) - empty_list(ilo, 'No indices to delete') + empty_list(ilo, "No indices to delete") for index in ilo.working_list(): print("Removing", index) @@ -57,32 +84,50 @@ def filter_main_indices(ilo, prefix, separator): date_regex = "\d{4}" + separator + "\d{2}" + separator + "\d{2}" time_string = "%Y" + separator + "%m" + separator + "%d" - ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-(span|service|dependencies)-" + date_regex) + ilo.filter_by_regex( + kind="regex", value=prefix + "jaeger-(span|service|dependencies)-" + date_regex + ) empty_list(ilo, "No indices to delete") # This excludes archive index as we use source='name' # source `creation_date` would include archive index - ilo.filter_by_age(source='name', direction='older', timestring=time_string, unit='days', unit_count=int(sys.argv[1])) + ilo.filter_by_age( + source="name", + direction="older", + timestring=time_string, + unit="days", + unit_count=int(sys.argv[1]), + ) def filter_main_indices_rollover(ilo, prefix): - ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-(span|service)-\d{6}") + ilo.filter_by_regex(kind="regex", value=prefix + "jaeger-(span|service)-\d{6}") empty_list(ilo, "No indices to delete") # do not remove active write indices - ilo.filter_by_alias(aliases=[prefix + 'jaeger-span-write'], exclude=True) + ilo.filter_by_alias(aliases=[prefix + "jaeger-span-write"], exclude=True) empty_list(ilo, "No indices to delete") - ilo.filter_by_alias(aliases=[prefix + 'jaeger-service-write'], exclude=True) + ilo.filter_by_alias(aliases=[prefix + "jaeger-service-write"], exclude=True) empty_list(ilo, "No indices to delete") - ilo.filter_by_age(source='creation_date', direction='older', unit='days', unit_count=int(sys.argv[1])) + ilo.filter_by_age( + source="creation_date", + direction="older", + unit="days", + unit_count=int(sys.argv[1]), + ) def filter_archive_indices_rollover(ilo, prefix): # Remove only rollover archive indices # Do not remove active write archive index - ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-span-archive-\d{6}") + ilo.filter_by_regex(kind="regex", value=prefix + "jaeger-span-archive-\d{6}") empty_list(ilo, "No indices to delete") - ilo.filter_by_alias(aliases=[prefix + 'jaeger-span-archive-write'], exclude=True) + ilo.filter_by_alias(aliases=[prefix + "jaeger-span-archive-write"], exclude=True) empty_list(ilo, "No indices to delete") - ilo.filter_by_age(source='creation_date', direction='older', unit='days', unit_count=int(sys.argv[1])) + ilo.filter_by_age( + source="creation_date", + direction="older", + unit="days", + unit_count=int(sys.argv[1]), + ) def empty_list(ilo, error_msg): @@ -94,7 +139,7 @@ def empty_list(ilo, error_msg): def str2bool(v): - return v.lower() in ('true', '1') + return v.lower() in ("true", "1") def create_client(username, password, tls, ca, cert, key, skipHostVerify): @@ -105,7 +150,9 @@ def create_client(username, password, tls, ca, cert, key, skipHostVerify): context.check_hostname = False context.verify_mode = ssl.CERT_NONE if username is not None and password is not None: - return elasticsearch.Elasticsearch(sys.argv[2:], http_auth=(username, password), ssl_context=context) + return elasticsearch.Elasticsearch( + sys.argv[2:], http_auth=(username, password), ssl_context=context + ) elif tls: context.load_cert_chain(certfile=cert, keyfile=key) return elasticsearch.Elasticsearch(sys.argv[2:], ssl_context=context) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index ba8df897..1d4c28ce 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -1,10 +1,16 @@ #!/usr/bin/env python3 -import traceback, os, re, asyncio, requests, json from bot_utils import setup, send_msg from random import randint from time import sleep +import traceback +import os +import re +import asyncio +import requests +import json + setup() AIRTABLE_API_KEY = os.getenv("AIRTABLE_API_KEY") @@ -12,6 +18,7 @@ AIRTABLE_BASE = os.getenv("AIRTABLE_BASE", "app89plJvA9EqTJEc") AIRTABLE_TABLE = os.getenv("AIRTABLE_TABLE", "Table%201") AIRTABLE_FIELD = os.getenv("AIRTABLE_FIELD", "Link") + async def run_checks(): try: await block_skylinks_from_airtable() @@ -31,10 +38,22 @@ async def block_skylinks_from_airtable(): offset = None retry = 0 while len(skylinks) == 0 or offset: - print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset" + (" (retry " + str(retry) + ")" if retry else "")) - query = "&".join(["fields%5B%5D=" + AIRTABLE_FIELD, ("offset=" + offset) if offset else ""]) + print( + "Requesting a batch of records from Airtable with " + + (offset if offset else "empty") + + " offset" + + (" (retry " + str(retry) + ")" if retry else "") + ) + query = "&".join( + ["fields%5B%5D=" + AIRTABLE_FIELD, ("offset=" + offset) if offset else ""] + ) response = requests.get( - "https://api.airtable.com/v0/" + AIRTABLE_BASE + "/" + AIRTABLE_TABLE + "?" + query, + "https://api.airtable.com/v0/" + + AIRTABLE_BASE + + "/" + + AIRTABLE_TABLE + + "?" + + query, headers=headers, ) @@ -44,40 +63,62 @@ async def block_skylinks_from_airtable(): if response.status_code == 429: if retry < 100: retry = retry + 1 - sleep(randint(1,10)) + sleep(randint(1, 10)) continue else: - return await send_msg("Airtable: too many retries, aborting!", force_notify=True) + return await send_msg( + "Airtable: too many retries, aborting!", force_notify=True + ) retry = 0 # reset retry counter if response.status_code != 200: status_code = str(response.status_code) response_text = response.text or "empty response" - message = "Airtable blocklist integration responded with code " + status_code + ": " + response_text + message = ( + "Airtable blocklist integration responded with code " + + status_code + + ": " + + response_text + ) return await send_msg(message, force_notify=False) data = response.json() if len(data["records"]) == 0: - return print("Airtable returned 0 records - make sure your configuration is correct") + return print( + "Airtable returned 0 records - make sure your configuration is correct" + ) - skylinks = skylinks + [entry["fields"].get(AIRTABLE_FIELD, "") for entry in data["records"]] - skylinks = [skylink for skylink in skylinks if skylink] # filter empty skylinks, most likely empty rows + skylinks = skylinks + [ + entry["fields"].get(AIRTABLE_FIELD, "") for entry in data["records"] + ] + skylinks = [ + skylink for skylink in skylinks if skylink + ] # filter empty skylinks, most likely empty rows offset = data.get("offset") print("Airtable returned total " + str(len(skylinks)) + " skylinks to block") skylinks_returned = skylinks - skylinks = [skylink for skylink in skylinks if re.search("^[a-zA-Z0-9_-]{46}$", skylink)] + skylinks = [ + skylink for skylink in skylinks if re.search("^[a-zA-Z0-9_-]{46}$", skylink) + ] if len(skylinks_returned) != len(skylinks): - invalid_skylinks = [str(skylink) for skylink in list(set(skylinks_returned) - set(skylinks))] - message = str(len(invalid_skylinks)) + " of the skylinks returned from Airtable are not valid" + invalid_skylinks = [ + str(skylink) for skylink in list(set(skylinks_returned) - set(skylinks)) + ] + message = ( + str(len(invalid_skylinks)) + + " of the skylinks returned from Airtable are not valid" + ) await send_msg(message, file=("\n".join(invalid_skylinks))) apipassword = exec("docker exec sia cat /sia-data/apipassword") - ipaddress = exec("docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sia") + ipaddress = exec( + "docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sia" + ) print("Sending blocklist request to siad") response = requests.post( @@ -92,7 +133,12 @@ async def block_skylinks_from_airtable(): else: status_code = str(response.status_code) response_text = response.text or "empty response" - message = "Siad blocklist endpoint responded with code " + status_code + ": " + response_text + message = ( + "Siad blocklist endpoint responded with code " + + status_code + + ": " + + response_text + ) return await send_msg(message, force_notify=False) print("Searching nginx cache for blocked files") @@ -101,15 +147,23 @@ async def block_skylinks_from_airtable(): for i in range(0, len(skylinks), batch_size): cached_files_command = ( "find /data/nginx/cache/ -type f | xargs -r grep -Els '^Skynet-Skylink: (" - + "|".join(skylinks[i:i+batch_size]) + + "|".join(skylinks[i : i + batch_size]) + ")'" ) - cached_files_count+= int(exec('docker exec nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) + cached_files_count += int( + exec( + 'docker exec nginx bash -c "' + + cached_files_command + + ' | xargs -r rm -v | wc -l"' + ) + ) if cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") - message = "Purged " + str(cached_files_count) + " blocklisted files from nginx cache" + message = ( + "Purged " + str(cached_files_count) + " blocklisted files from nginx cache" + ) return await send_msg(message) diff --git a/setup-scripts/bot_utils.py b/setup-scripts/bot_utils.py index a015400f..69b6c98f 100644 --- a/setup-scripts/bot_utils.py +++ b/setup-scripts/bot_utils.py @@ -1,12 +1,19 @@ #!/usr/bin/env python3 -from urllib.request import urlopen, Request from dotenv import load_dotenv from pathlib import Path from datetime import datetime from discord_webhook import DiscordWebhook -import urllib, json, os, traceback, sys, re, subprocess, requests, io +import urllib +import json +import os +import traceback +import sys +import re +import subprocess +import requests +import io # Load dotenv file if possible. # TODO: change all scripts to use named flags/params @@ -25,6 +32,7 @@ sc_precision = 10 ** 24 # Environment variable globals setup_done = False + # find out local siad ip by inspecting its docker container def get_docker_container_ip(container_name): ip_regex = re.compile(r"^\d{1,3}\.\d{1,3}\.\d{1,3}\.\d{1,3}$") @@ -41,6 +49,7 @@ api_endpoint = "http://{}:{}".format( get_docker_container_ip(CONTAINER_NAME), os.getenv("API_PORT", "9980") ) + # find siad api password by getting it out of the docker container def get_api_password(): api_password_regex = re.compile(r"^\w+$") @@ -56,7 +65,8 @@ def setup(): setup_done = True -# send_msg sends the msg to the specified discord channel. If force_notify is set to true it adds "@here". +# send_msg sends the msg to the specified discord channel. +# If force_notify is set to true it adds "@here". async def send_msg(msg, force_notify=False, file=None): try: webhook_url = os.getenv("DISCORD_WEBHOOK_URL") diff --git a/setup-scripts/funds-checker.py b/setup-scripts/funds-checker.py index ebe1ece3..fa98e55b 100755 --- a/setup-scripts/funds-checker.py +++ b/setup-scripts/funds-checker.py @@ -5,9 +5,11 @@ funds-checker runs simple checks on a portal node using the siad API and dispatches messages to a Discord channel. """ -import traceback, asyncio, os from bot_utils import setup, send_msg, siad, sc_precision +import traceback +import asyncio + setup() diff --git a/setup-scripts/health-checker.py b/setup-scripts/health-checker.py index e88460df..7c5e9d94 100755 --- a/setup-scripts/health-checker.py +++ b/setup-scripts/health-checker.py @@ -105,7 +105,7 @@ async def check_disk(): ) inspect = os.popen("docker inspect sia").read().strip() inspect_json = json.loads(inspect) - if inspect_json[0]["State"]["Running"] == True: + if inspect_json[0]["State"]["Running"] is True: # mark portal as unhealthy os.popen("docker exec health-check cli/disable") time.sleep(300) # wait 5 minutes to propagate dns changes @@ -133,7 +133,10 @@ async def check_health(): res = requests.get(endpoint + "/health-check", verify=False) json_check = res.json() - server_failure = res.status_code is not requests.codes["ok"] and json_check["disabled"] == False: + server_failure = ( + res.status_code is not requests.codes["ok"] + and json_check["disabled"] is False + ) res = requests.get(endpoint + "/health-check/critical", verify=False) json_critical = res.json() @@ -171,12 +174,12 @@ async def check_health(): bad = False for check in critical["checks"]: critical_checks_total += 1 - if check["up"] == False: + if check["up"] is False: critical_checks_failed += 1 bad = True if bad: critical["checks"] = [ - check for check in critical["checks"] if check["up"] == False + check for check in critical["checks"] if check["up"] is False ] failed_records.append(critical) @@ -187,12 +190,12 @@ async def check_health(): bad = False for check in extended["checks"]: extended_checks_total += 1 - if check["up"] == False: + if check["up"] is False: extended_checks_failed += 1 bad = True if bad: extended["checks"] = [ - check for check in extended["checks"] if check["up"] == False + check for check in extended["checks"] if check["up"] is False ] failed_records.append(extended) @@ -227,11 +230,7 @@ async def check_health(): failed_records_file = json.dumps(failed_records, indent=2) # send a message if we force notification, there is a failures dump or just once daily (heartbeat) on 1 AM - if ( - force_notify - or failed_records_file - or datetime.utcnow().hour == 1 - ): + if force_notify or failed_records_file or datetime.utcnow().hour == 1: return await send_msg( message, file=failed_records_file, force_notify=force_notify ) diff --git a/setup-scripts/log-checker.py b/setup-scripts/log-checker.py index e5f21f4e..90c242df 100755 --- a/setup-scripts/log-checker.py +++ b/setup-scripts/log-checker.py @@ -1,9 +1,12 @@ #!/usr/bin/env python3 -import sys, traceback, io, os, asyncio -from bot_utils import setup, send_msg, upload_to_skynet +from bot_utils import setup, send_msg from subprocess import Popen, PIPE +import sys +import traceback +import asyncio + """ log-checker checks the docker logs for siad.