From 96ade5f36df77d9a449bc14cb4ecfb0771349d19 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 08:22:32 +0200 Subject: [PATCH 01/11] airtable retries logic --- setup-scripts/blocklist-airtable.py | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 8a2c61cd..1420439c 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -2,6 +2,8 @@ import traceback, os, re, asyncio, requests, json, discord from bot_utils import setup, send_msg +from random import randint +from time import sleep setup() @@ -27,14 +29,27 @@ async def block_skylinks_from_airtable(): headers = {"Authorization": "Bearer " + AIRTABLE_API_KEY} skylinks = [] offset = None + retry = 0 while len(skylinks) == 0 or offset: - print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset") + print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset" + (" (retry " + retry + ")" if retry else "")) query = "&".join(["fields%5B%5D=" + AIRTABLE_FIELD, ("offset=" + offset) if offset else ""]) response = requests.get( "https://api.airtable.com/v0/" + AIRTABLE_BASE + "/" + AIRTABLE_TABLE + "?" + query, headers=headers, ) + # rate limited - sleep for 2-10 secs and retry (up to 100 times, ~10 minutes) + # https://support.airtable.com/hc/en-us/articles/203313985-Public-REST-API + # > 5 requests per second, per base + if response.status_code == 429: + if retry < 100: + retry = retry + 1 + sleep(randint(1,10)) + continue + else: + return await send_msg("Airtable: too many retries, aborting!", force_notify=True) + retry = 0 # reset retry counter + if response.status_code != 200: status_code = str(response.status_code) response_text = response.text or "empty response" @@ -84,7 +99,7 @@ async def block_skylinks_from_airtable(): cached_files_count = 0 for i in range(0, len(skylinks), 1000): cached_files_command = ( - "/usr/bin/find /data/nginx/cache/ -type f | /usr/bin/xargs --no-run-if-empty -n1000 /bin/grep -Els '^KEY: .*(" + "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: .*(" + "|".join(skylinks[i:i+1000]) + ")'" ) From fdee9b46eeea87eb3be5b478777f7121e76f8b3e Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 08:55:29 +0200 Subject: [PATCH 02/11] concatenate string --- setup-scripts/blocklist-airtable.py | 2 +- setup-scripts/support/logrotate | 13 +++++++++++++ 2 files changed, 14 insertions(+), 1 deletion(-) create mode 100644 setup-scripts/support/logrotate diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 1420439c..a7773cee 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -31,7 +31,7 @@ async def block_skylinks_from_airtable(): offset = None retry = 0 while len(skylinks) == 0 or offset: - print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset" + (" (retry " + retry + ")" if retry else "")) + print("Requesting a batch of records from Airtable with " + (offset if offset else "empty") + " offset" + (" (retry " + str(retry) + ")" if retry else "")) query = "&".join(["fields%5B%5D=" + AIRTABLE_FIELD, ("offset=" + offset) if offset else ""]) response = requests.get( "https://api.airtable.com/v0/" + AIRTABLE_BASE + "/" + AIRTABLE_TABLE + "?" + query, diff --git a/setup-scripts/support/logrotate b/setup-scripts/support/logrotate new file mode 100644 index 00000000..81c7e40f --- /dev/null +++ b/setup-scripts/support/logrotate @@ -0,0 +1,13 @@ +/home/user/skynet-webportal/docker/data/nginx/logs/*.log { + hourly + dateext + rotate 3650 + missingok + notifempty + nocompress + create 640 root root + sharedscripts + postrotate + docker exec nginx sh -c '! test -f /usr/local/openresty/nginx/logs/nginx.pid' || docker exec nginx sh -c 'kill -USR1 $(cat /usr/local/openresty/nginx/logs/nginx.pid)' + endscript +} \ No newline at end of file From 273bcbcb8f4c4dce3cc29d2c68dee92fb137f23c Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:07:15 +0200 Subject: [PATCH 03/11] fix cache busting --- setup-scripts/blocklist-airtable.py | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index a7773cee..eb835955 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -96,20 +96,22 @@ async def block_skylinks_from_airtable(): return await send_msg(message, force_notify=False) print("Searching nginx cache for blocked files") - cached_files_count = 0 + total_cached_files_count = 0 for i in range(0, len(skylinks), 1000): cached_files_command = ( - "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: .*(" + "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: (" + "|".join(skylinks[i:i+1000]) + ")'" ) - cached_files_count += int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | wc -l"') or 0) + cached_files_count = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | wc -l"') or 0) + if cached_files_count: + total_cached_files_count+= cached_files_count + exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs rm"') - if cached_files_count == 0: + if total_cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") - exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs rm"') - message = "Purged " + str(cached_files_count) + " blocklisted files from nginx cache" + message = "Purged " + str(total_cached_files_count) + " blocklisted files from nginx cache" return await send_msg(message) From 52e5a345cf6a53e73c8179688edefd1c6187300c Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:09:55 +0200 Subject: [PATCH 04/11] improve --- setup-scripts/blocklist-airtable.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index eb835955..ff169a46 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -106,7 +106,8 @@ async def block_skylinks_from_airtable(): cached_files_count = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | wc -l"') or 0) if cached_files_count: total_cached_files_count+= cached_files_count - exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs rm"') + removed = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) + print(removed) if total_cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") From 73cf5b510e233328cd1e6caf299690fb6e1c3c8f Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:10:54 +0200 Subject: [PATCH 05/11] improve --- setup-scripts/blocklist-airtable.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index ff169a46..5cb4d14e 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -104,10 +104,10 @@ async def block_skylinks_from_airtable(): + ")'" ) cached_files_count = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | wc -l"') or 0) + removed = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) + print(removed) if cached_files_count: total_cached_files_count+= cached_files_count - removed = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) - print(removed) if total_cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") From 7c9d1debd29d5bd4d005e15eff83b63dd18e197a Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:13:30 +0200 Subject: [PATCH 06/11] one less search --- setup-scripts/blocklist-airtable.py | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 5cb4d14e..f26f1e73 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -96,23 +96,19 @@ async def block_skylinks_from_airtable(): return await send_msg(message, force_notify=False) print("Searching nginx cache for blocked files") - total_cached_files_count = 0 + cached_files_count = 0 for i in range(0, len(skylinks), 1000): cached_files_command = ( "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: (" + "|".join(skylinks[i:i+1000]) + ")'" ) - cached_files_count = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | wc -l"') or 0) - removed = int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) - print(removed) - if cached_files_count: - total_cached_files_count+= cached_files_count + cached_files_count+= int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) - if total_cached_files_count == 0: + if cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") - message = "Purged " + str(total_cached_files_count) + " blocklisted files from nginx cache" + message = "Purged " + str(cached_files_count) + " blocklisted files from nginx cache" return await send_msg(message) From 6c1b96606e697b2b0af114efe53752517017b357 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:17:29 +0200 Subject: [PATCH 07/11] one less search --- setup-scripts/blocklist-airtable.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index f26f1e73..8b0d9705 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -103,7 +103,9 @@ async def block_skylinks_from_airtable(): + "|".join(skylinks[i:i+1000]) + ")'" ) - cached_files_count+= int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) + output = exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"') + print(output) + cached_files_count+= int(output) if cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") From ed19736e2296a5c670f9d840633fbd43e7450fc8 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:24:45 +0200 Subject: [PATCH 08/11] smaller batch --- setup-scripts/blocklist-airtable.py | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 8b0d9705..379841eb 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -97,15 +97,14 @@ async def block_skylinks_from_airtable(): print("Searching nginx cache for blocked files") cached_files_count = 0 - for i in range(0, len(skylinks), 1000): + batch_size = 500 + for i in range(0, len(skylinks), batch_size): cached_files_command = ( - "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n1000 grep -Els '^Skynet-Skylink: (" - + "|".join(skylinks[i:i+1000]) + "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n" + batch_size + " grep -Els '^Skynet-Skylink: (" + + "|".join(skylinks[i:i+batch_size]) + ")'" ) - output = exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"') - print(output) - cached_files_count+= int(output) + cached_files_count+= int(exec('docker exec -it nginx bash -c "' + cached_files_command + ' | xargs -r rm -v | wc -l"')) if cached_files_count == 0: return print("No nginx cached files matching blocked skylinks were found") From 7f448fb31fb826f64effd3d3db72b8efbdcf7c75 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:25:23 +0200 Subject: [PATCH 09/11] smaller batch fix --- setup-scripts/blocklist-airtable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 379841eb..18191969 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -100,7 +100,7 @@ async def block_skylinks_from_airtable(): batch_size = 500 for i in range(0, len(skylinks), batch_size): cached_files_command = ( - "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n" + batch_size + " grep -Els '^Skynet-Skylink: (" + "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n" + str(batch_size) + " grep -Els '^Skynet-Skylink: (" + "|".join(skylinks[i:i+batch_size]) + ")'" ) From ae6043817b6010b9bc33ee64753abedbd9d7f363 Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Sun, 18 Jul 2021 09:28:24 +0200 Subject: [PATCH 10/11] btach size 1k --- setup-scripts/blocklist-airtable.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup-scripts/blocklist-airtable.py b/setup-scripts/blocklist-airtable.py index 18191969..de16f533 100755 --- a/setup-scripts/blocklist-airtable.py +++ b/setup-scripts/blocklist-airtable.py @@ -97,7 +97,7 @@ async def block_skylinks_from_airtable(): print("Searching nginx cache for blocked files") cached_files_count = 0 - batch_size = 500 + batch_size = 1000 for i in range(0, len(skylinks), batch_size): cached_files_command = ( "find /data/nginx/cache/ -type f | xargs --no-run-if-empty -n" + str(batch_size) + " grep -Els '^Skynet-Skylink: (" From 0e419d820c2dac238c36180b9ad5ce979fa675bc Mon Sep 17 00:00:00 2001 From: Karol Wypchlo Date: Mon, 19 Jul 2021 12:21:51 +0200 Subject: [PATCH 11/11] don't include logrotate, included by mistake --- setup-scripts/support/logrotate | 13 ------------- 1 file changed, 13 deletions(-) delete mode 100644 setup-scripts/support/logrotate diff --git a/setup-scripts/support/logrotate b/setup-scripts/support/logrotate deleted file mode 100644 index 81c7e40f..00000000 --- a/setup-scripts/support/logrotate +++ /dev/null @@ -1,13 +0,0 @@ -/home/user/skynet-webportal/docker/data/nginx/logs/*.log { - hourly - dateext - rotate 3650 - missingok - notifempty - nocompress - create 640 root root - sharedscripts - postrotate - docker exec nginx sh -c '! test -f /usr/local/openresty/nginx/logs/nginx.pid' || docker exec nginx sh -c 'kill -USR1 $(cat /usr/local/openresty/nginx/logs/nginx.pid)' - endscript -} \ No newline at end of file