diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 8a2c61cd..1420439c 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -2,6 +2,8 @@ import traceback, os, re, asyncio, requests, json, discord from bot_utils import setup, send_msg +from random import randint +from time import sleep setup() @@ -27,14 +29,27 @@ async def block_skylinks_from_airtable(): headers = {"Authorization": "Bearer " + AIRTABLE_API_KEY} skylinks = [] offset = None + retry = 0 while len(skylinks) == 0 or offset: - print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset") + print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset" + (" (retry " + retry + ")" if retry else "")) query = "&".join(["fields%5B%5D=" + AIRTABLE_FIELD, ("offset=" + offset) if offset else ""]) response = requests.get( "https://api.airtable.com/v0/" + AIRTABLE_BASE + "/" + AIRTABLE_TABLE + "?" + query, headers=headers, ) + # rate limited - sleep for 2-10 secs and retry (up to 100 times, ~10 minutes) + # https://support.airtable.com/hc/en-us/articles/203313985-Public-REST-API + # > 5 requests per second, per base + if response.status_code == 429: + if retry < 100: + retry = retry + 1 + sleep(randint(1,10)) + continue + else: + return await send_msg("Airtable: too many retries, aborting!", force_notify=True) + retry = 0 # reset retry counter + if response.status_code != 200: status_code = str(response.status_code) response_text = response.text or "empty response" @@ -84,7 +99,7 @@ async def block_skylinks_from_airtable(): cached_files_count = 0 for i in range(0, len(skylinks), 1000): cached_files_command = ( - "/usr/bin/find /data/nginx/cache/ -type f | /usr/bin/xargs --no-run-if-empty -n1000 /bin/grep -Els '^KEY: .*(" + "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: .*(" + "|".join(skylinks[i:i+1000]) + ")'" )