Merge pull request #1869 from SkynetLabs/clean-up-nginx-skylink-cache-related-code

remove nginx skylink cache related code
This commit is contained in:
Christopher Schinnerl 2022-03-16 16:06:19 +01:00 committed by GitHub
commit e2110fdafc
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
14 changed files with 20 additions and 291 deletions

View File

@ -64,8 +64,6 @@ services:
logging: *default-logging logging: *default-logging
env_file: env_file:
- .env - .env
environment:
- SKYD_DISK_CACHE_ENABLED=${SKYD_DISK_CACHE_ENABLED:-true}
volumes: volumes:
- ./docker/data/nginx/cache:/data/nginx/cache - ./docker/data/nginx/cache:/data/nginx/cache
- ./docker/data/nginx/blocker:/data/nginx/blocker - ./docker/data/nginx/blocker:/data/nginx/blocker

View File

@ -1,6 +1,5 @@
include /etc/nginx/conf.d/include/cors; include /etc/nginx/conf.d/include/cors;
include /etc/nginx/conf.d/include/proxy-buffer; include /etc/nginx/conf.d/include/proxy-buffer;
include /etc/nginx/conf.d/include/proxy-cache-downloads;
include /etc/nginx/conf.d/include/track-download; include /etc/nginx/conf.d/include/track-download;
limit_conn downloads_by_ip 100; # ddos protection: max 100 downloads at a time limit_conn downloads_by_ip 100; # ddos protection: max 100 downloads at a time
@ -9,62 +8,10 @@ limit_conn downloads_by_ip 100; # ddos protection: max 100 downloads at a time
# this is important because we want only one format in cache keys and logs # this is important because we want only one format in cache keys and logs
set_by_lua_block $skylink { return require("skynet.skylink").parse(ngx.var.skylink) } set_by_lua_block $skylink { return require("skynet.skylink").parse(ngx.var.skylink) }
# $skylink_v1 and $skylink_v2 variables default to the same value but in case the requested skylink was:
# a) skylink v1 - it would not matter, no additional logic is executed
# b) skylink v2 - in a lua block below we will resolve the skylink v2 into skylink v1 and update
# $skylink_v1 variable so then the proxy request to skyd can be cached in nginx (proxy_cache_key
# in proxy-cache-downloads includes $skylink_v1 as a part of the cache key)
set $skylink_v1 $skylink;
set $skylink_v2 $skylink;
# variable for Skynet-Proof header that we need to inject
# into a response if the request was for skylink v2
set $skynet_proof '';
# default download rate to unlimited # default download rate to unlimited
set $limit_rate 0; set $limit_rate 0;
access_by_lua_block { access_by_lua_block {
-- the block below only makes sense if we are using nginx cache
if not ngx.var.skyd_disk_cache_enabled then
local httpc = require("resty.http").new()
-- detect whether requested skylink is v2
local isBase32v2 = string.len(ngx.var.skylink) == 55 and string.sub(ngx.var.skylink, 0, 2) == "04"
local isBase64v2 = string.len(ngx.var.skylink) == 46 and string.sub(ngx.var.skylink, 0, 2) == "AQ"
if isBase32v2 or isBase64v2 then
-- 10.10.10.10 points to sia service (alias not available when using resty-http)
local res, err = httpc:request_uri("http://10.10.10.10:9980/skynet/resolve/" .. ngx.var.skylink_v2, {
headers = { ["User-Agent"] = "Sia-Agent" }
})
-- print error and exit with 500 or exit with response if status is not 200
if err or (res and res.status ~= ngx.HTTP_OK) then
ngx.status = (err and ngx.HTTP_INTERNAL_SERVER_ERROR) or res.status
ngx.header["content-type"] = "text/plain"
ngx.say(err or res.body)
return ngx.exit(ngx.status)
end
local json = require('cjson')
local resolve = json.decode(res.body)
ngx.var.skylink_v1 = resolve.skylink
ngx.var.skynet_proof = res.headers["Skynet-Proof"]
end
-- check if skylink v1 is present on blocklist (compare hashes)
if require("skynet.blocklist").is_blocked(ngx.var.skylink_v1) then
return require("skynet.blocklist").exit_illegal()
end
-- if skylink is found on nocache list then set internal nocache variable
-- to tell nginx that it should not try and cache this file (too large)
if ngx.shared.nocache:get(ngx.var.skylink_v1) then
ngx.var.nocache = "1"
end
end
if require("skynet.account").accounts_enabled() then if require("skynet.account").accounts_enabled() then
-- check if portal is in authenticated only mode -- check if portal is in authenticated only mode
if require("skynet.account").is_access_unauthorized() then if require("skynet.account").is_access_unauthorized() then
@ -84,36 +31,10 @@ access_by_lua_block {
end end
} }
header_filter_by_lua_block {
ngx.header["Skynet-Portal-Api"] = ngx.var.scheme .. "://" .. ngx.var.skynet_portal_domain
ngx.header["Skynet-Server-Api"] = ngx.var.scheme .. "://" .. ngx.var.skynet_server_domain
-- the block below only makes sense if we are using nginx cache
if not ngx.var.skyd_disk_cache_enabled then
-- not empty skynet_proof means this is a skylink v2 request
-- so we should replace the Skynet-Proof header with the one
-- we got from /skynet/resolve/ endpoint, otherwise we would
-- be serving cached empty v1 skylink Skynet-Proof header
if ngx.var.skynet_proof and ngx.var.skynet_proof ~= "" then
ngx.header["Skynet-Proof"] = ngx.var.skynet_proof
end
-- add skylink to nocache list if it exceeds 1GB (1e+9 bytes) threshold
-- (content length can be nil for already cached files - we can ignore them)
if ngx.header["Content-Length"] and tonumber(ngx.header["Content-Length"]) > 1e+9 then
ngx.shared.nocache:set(ngx.var.skylink_v1, ngx.header["Content-Length"])
end
end
}
limit_rate_after 512k; limit_rate_after 512k;
limit_rate $limit_rate; limit_rate $limit_rate;
proxy_read_timeout 600; proxy_read_timeout 600;
proxy_set_header User-Agent: Sia-Agent; proxy_set_header User-Agent: Sia-Agent;
# in case the requested skylink was v2 and we already resolved it to skylink v1, we are going to pass resolved proxy_pass http://sia:9980/skynet/skylink/$skylink$path$is_args$args;
# skylink v1 to skyd to save that extra skylink v2 lookup in skyd but in turn, in case skyd returns a redirect
# we need to rewrite the skylink v1 to skylink v2 in the location header with proxy_redirect
proxy_redirect $skylink_v1 $skylink_v2;
proxy_pass http://sia:9980/skynet/skylink/$skylink_v1$path$is_args$args;

View File

@ -1,21 +0,0 @@
proxy_cache skynet; # cache name
proxy_cache_key $skylink_v1$path$arg_format$arg_attachment$arg_start$arg_end$http_range; # unique cache key
proxy_cache_min_uses 3; # cache after 3 uses
proxy_cache_valid 200 206 307 308 48h; # keep 200, 206, 307 and 308 responses valid for up to 2 days
add_header X-Proxy-Cache $upstream_cache_status; # add response header to indicate cache hits and misses
# map skyd env variable value to "1" for true and "0" for false (expected by proxy_no_cache)
set_by_lua_block $skyd_disk_cache_enabled {
return os.getenv("SKYD_DISK_CACHE_ENABLED") == "true" and "1" or "0"
}
# bypass - this will bypass cache hit on request (status BYPASS)
# but still stores file in cache if cache conditions are met
proxy_cache_bypass $cookie_nocache $arg_nocache $skyd_disk_cache_enabled;
# no cache - this will ignore cache on request (status MISS)
# and does not store file in cache under no condition
set_if_empty $nocache "0";
# disable cache when nocache is set or skyd cache is enabled
proxy_no_cache $nocache $skyd_disk_cache_enabled;

View File

@ -1,9 +0,0 @@
server {
# local server - do not expose this port externally
listen 8000;
# secure traffic by limiting to only local networks
include /etc/nginx/conf.d/include/local-network-only;
include /etc/nginx/conf.d/server/server.local;
}

View File

@ -38,8 +38,6 @@ location / {
end end
ngx.var.skylink = require("skynet.skylink").parse(ngx.var.skylink) ngx.var.skylink = require("skynet.skylink").parse(ngx.var.skylink)
ngx.var.skylink_v1 = ngx.var.skylink
ngx.var.skylink_v2 = ngx.var.skylink
} }
include /etc/nginx/conf.d/include/location-skylink; include /etc/nginx/conf.d/include/location-skylink;

View File

@ -1,37 +0,0 @@
include /etc/nginx/conf.d/include/init-optional-variables;
location /skynet/blocklist {
client_max_body_size 10m; # increase max body size to account for large lists
client_body_buffer_size 10m; # force whole body to memory so we can read it
content_by_lua_block {
local httpc = require("resty.http").new()
ngx.req.read_body() -- ensure the post body data is read before using get_body_data
-- proxy blocklist update request
-- 10.10.10.10 points to sia service (alias not available when using resty-http)
local res, err = httpc:request_uri("http://10.10.10.10:9980/skynet/blocklist", {
method = "POST",
body = ngx.req.get_body_data(),
headers = {
["Content-Type"] = "application/x-www-form-urlencoded",
["Authorization"] = require("skynet.utils").authorization_header(),
["User-Agent"] = "Sia-Agent",
}
})
-- print error and exit with 500 or exit with response if status is not 204
if err or (res and res.status ~= ngx.HTTP_NO_CONTENT) then
ngx.status = (err and ngx.HTTP_INTERNAL_SERVER_ERROR) or res.status
ngx.header["content-type"] = "text/plain"
ngx.say(err or res.body)
return ngx.exit(ngx.status)
end
require("skynet.blocklist").reload()
ngx.status = ngx.HTTP_NO_CONTENT
return ngx.exit(ngx.status)
}
}

View File

@ -1,66 +0,0 @@
local _M = {}
function _M.reload()
local httpc = require("resty.http").new()
-- fetch blocklist records (all blocked skylink hashes)
-- 10.10.10.10 points to sia service (alias not available when using resty-http)
local res, err = httpc:request_uri("http://10.10.10.10:9980/skynet/blocklist", {
headers = {
["User-Agent"] = "Sia-Agent",
}
})
-- fail whole request in case this request failed, we want to make sure
-- the blocklist is pre cached before serving first skylink
if err or (res and res.status ~= ngx.HTTP_OK) then
ngx.log(ngx.ERR, "Failed skyd service request /skynet/blocklist: ", err or ("[HTTP " .. res.status .. "] " .. res.body))
ngx.status = (err and ngx.HTTP_INTERNAL_SERVER_ERROR) or res.status
ngx.header["content-type"] = "text/plain"
ngx.say(err or res.body)
return ngx.exit(ngx.status)
elseif res and res.status == ngx.HTTP_OK then
local json = require('cjson')
local data = json.decode(res.body)
-- mark all existing entries as expired
ngx.shared.blocklist:flush_all()
-- check if blocklist is table (it is null when empty)
if type(data.blocklist) == "table" then
-- set all cache entries one by one (resets expiration)
for i, hash in ipairs(data.blocklist) do
ngx.shared.blocklist:set(hash, true)
end
end
-- ensure that init flag is persisted
ngx.shared.blocklist:set("__init", true)
-- remove all leftover expired entries
ngx.shared.blocklist:flush_expired()
end
end
function _M.is_blocked(skylink)
-- make sure that blocklist has been preloaded
if not ngx.shared.blocklist:get("__init") then _M.reload() end
-- hash skylink before comparing it with blocklist
local hash = require("skynet.skylink").hash(skylink)
-- we need to use get_stale because we are expiring previous
-- entries when the blocklist is reloading and we still want
-- to block them until the reloading is finished
return ngx.shared.blocklist:get_stale(hash) == true
end
-- exit with 416 illegal content status code
function _M.exit_illegal()
ngx.status = ngx.HTTP_ILLEGAL
ngx.header["content-type"] = "text/plain"
ngx.say("Unavailable For Legal Reasons")
return ngx.exit(ngx.status)
end
return _M

View File

@ -31,7 +31,6 @@ env SERVER_DOMAIN;
env PORTAL_MODULES; env PORTAL_MODULES;
env ACCOUNTS_LIMIT_ACCESS; env ACCOUNTS_LIMIT_ACCESS;
env SIA_API_PASSWORD; env SIA_API_PASSWORD;
env SKYD_DISK_CACHE_ENABLED;
events { events {
worker_connections 8192; worker_connections 8192;
@ -75,20 +74,10 @@ http {
# proxy cache definition # proxy cache definition
proxy_cache_path /data/nginx/cache levels=1:2 keys_zone=skynet:10m max_size=50g min_free=100g inactive=48h use_temp_path=off; proxy_cache_path /data/nginx/cache levels=1:2 keys_zone=skynet:10m max_size=50g min_free=100g inactive=48h use_temp_path=off;
# create a shared blocklist dictionary with size of 30 megabytes
# estimated capacity of 1 megabyte dictionary is 3500 blocklist entries
# that gives us capacity of around 100k entries in 30 megabyte dictionary
lua_shared_dict blocklist 30m;
# create a shared dictionary to fill with skylinks that should not
# be cached due to the large size or some other reasons
lua_shared_dict nocache 10m;
# this runs before forking out nginx worker processes # this runs before forking out nginx worker processes
init_by_lua_block { init_by_lua_block {
require "cjson" require "cjson"
require "resty.http" require "resty.http"
require "skynet.blocklist"
require "skynet.skylink" require "skynet.skylink"
require "skynet.utils" require "skynet.utils"
} }

View File

@ -29,12 +29,6 @@ the health check.
The `portal-upgrade.sh` script upgrades the docker images for a portal and The `portal-upgrade.sh` script upgrades the docker images for a portal and
clears and leftover images. clears and leftover images.
**nginx-prune.sh**\
The `nginx-prune.sh` script deletes all entries from nginx cache larger than
the given size and smaller entries until nginx cache disk size is smaller than
the given cache size limit. Both values are configured in
`lib/nginx-prune-cache-subscript.sh`. The script doesn't require `sudo`.
## Webportal Upgrade Procedures ## Webportal Upgrade Procedures
TODO... TODO...

View File

@ -34,18 +34,16 @@ else
skylinks=("$1") # just single skylink passed as input argument skylinks=("$1") # just single skylink passed as input argument
fi fi
# get local nginx ip adress # get local skyd ip adress
nginx_ip=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' nginx) ipaddress=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sia)
# get sia api password either from env variable if exists or from apipassword file in sia-data directory
apipassword=$(docker exec sia sh -c '[ ! -z "${SIA_API_PASSWORD}" ] && echo ${SIA_API_PASSWORD} || $(cat /sia-data/apipassword | tr -d '\n')')
# iterate over provided skylinks and block them one by one # iterate over provided skylinks and block them one by one
for skylink in "${skylinks[@]}"; do for skylink in "${skylinks[@]}"; do
printf "Blocking ${skylink} ... " echo "> Blocking ${skylink} ... "
status_code=$(curl --write-out '%{http_code}' --silent --output /dev/null --data "{\"add\":[\"$skylink\"]}" "http://${nginx_ip}:8000/skynet/blocklist")
# print blocklist response status code # POST /skynet/blocklist always returns 200 and in case of failure print error message
if [ $status_code = "204" ]; then curl -A Sia-Agent -u "":${apipassword} --data "{\"add\":[\"$skylink\"]}" "http://${ipaddress}:9980/skynet/blocklist"
echo "done"
else
echo "error $status_code"
fi
done done

View File

@ -1,30 +0,0 @@
#!/usr/local/bin/bash
# This subscript is expected to be run inside docker container using 'bash'
# image. The image is based on Alpine Linux. It's tools (find, stat, awk, sort)
# are non-standard versions from BusyBox.
MAX_CACHE_DIR_SIZE=20000000000
MAX_KEEP_FILE_SIZE=1000000000
total=0
# We sort files by time, newest files are first. Format is:
# time (last modification as seconds since Epoch), filepath, size (bytes)
find /home/user/skynet-webportal/docker/data/nginx/cache -type f -exec stat -c "%Y %n %s" {} + | sort -rgk1 | while read line
do
size=$(echo $line | awk '{print $3}')
new_total=$(($total + $size))
# We always delete all files larger than MAX_KEEP_FILE_SIZE.
# We keep all files smaller than MAX_KEEP_FILE_SIZE when cache size is
# below MAX_CACHE_DIR_SIZE, then we delete also smaller files.
if (("$size" <= "$MAX_KEEP_FILE_SIZE" && "$new_total" < "$MAX_CACHE_DIR_SIZE"))
then
total=$new_total
continue
fi
filename=$(echo $line | awk '{print $2}')
rm $filename
done

View File

@ -1,9 +0,0 @@
#!/bin/bash
# We execute the nginx cache pruning subscript from docker container so that we
# can run the pruning script in user crontab without sudo.
docker run --rm -v /home/user:/home/user bash /home/user/skynet-webportal/scripts/lib/nginx-prune-cache-subscript.sh
# Some cache files are deleted, but are kept open, we hot reload nginx to get
# them closed and removed from filesystem.
docker exec nginx nginx -s reload

View File

@ -1,6 +1,6 @@
#!/usr/bin/env python3 #!/usr/bin/env python3
from bot_utils import setup, send_msg from bot_utils import get_api_password, setup, send_msg
from random import randint from random import randint
from time import sleep from time import sleep
@ -11,6 +11,8 @@ import asyncio
import requests import requests
import json import json
from requests.auth import HTTPBasicAuth
setup() setup()
@ -38,14 +40,14 @@ def exec(command):
async def block_skylinks_from_airtable(): async def block_skylinks_from_airtable():
# Get nginx's IP before doing anything else. If this step fails we don't # Get sia IP before doing anything else. If this step fails we don't
# need to continue with the execution of the script. # need to continue with the execution of the script.
ipaddress = exec( ipaddress = exec(
"docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' nginx" "docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sia"
) )
if ipaddress == "": if ipaddress == "":
print("Nginx's IP could not be detected. Exiting.") print("Skyd IP could not be detected. Exiting.")
return return
print("Pulling blocked skylinks from Airtable via api integration") print("Pulling blocked skylinks from Airtable via api integration")
@ -117,11 +119,13 @@ async def block_skylinks_from_airtable():
print( print(
"Sending /skynet/blocklist request with " "Sending /skynet/blocklist request with "
+ str(len(skylinks)) + str(len(skylinks))
+ " skylinks to siad through nginx" + " skylinks to siad"
) )
response = requests.post( response = requests.post(
"http://" + ipaddress + ":8000/skynet/blocklist", "http://" + ipaddress + ":9980/skynet/blocklist",
data=json.dumps({"add": skylinks}), data=json.dumps({"add": skylinks}),
headers={"User-Agent": "Sia-Agent"},
auth=HTTPBasicAuth("", get_api_password()),
) )
if response.status_code != 200: if response.status_code != 200:
@ -153,5 +157,5 @@ loop.run_until_complete(run_checks())
# --- BASH EQUIVALENT # --- BASH EQUIVALENT
# skylinks=$(curl "https://api.airtable.com/v0/${AIRTABLE_BASE}/${AIRTABLE_TABLE}?fields%5B%5D=${AIRTABLE_FIELD}" -H "Authorization: Bearer ${AIRTABLE_KEY}" | python3 -c "import sys, json; print('[\"' + '\",\"'.join([entry['fields']['Link'] for entry in json.load(sys.stdin)['records']]) + '\"]')") # skylinks=$(curl "https://api.airtable.com/v0/${AIRTABLE_BASE}/${AIRTABLE_TABLE}?fields%5B%5D=${AIRTABLE_FIELD}" -H "Authorization: Bearer ${AIRTABLE_KEY}" | python3 -c "import sys, json; print('[\"' + '\",\"'.join([entry['fields']['Link'] for entry in json.load(sys.stdin)['records']]) + '\"]')")
# ipaddress=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' nginx) # ipaddress=$(docker inspect -f '{{range.NetworkSettings.Networks}}{{.IPAddress}}{{end}}' sia)
# curl --data "{\"add\" : ${skylinks}}" "${ipaddress}:8000/skynet/blocklist" # curl --data "{\"add\" : ${skylinks}}" "${ipaddress}:8000/skynet/blocklist"

View File

@ -5,4 +5,3 @@
44 5 * * * /home/user/skynet-webportal/scripts/backup-aws-s3.sh 1>>/home/user/skynet-webportal/logs/backup-aws-s3.log 2>>/home/user/skynet-webportal/logs/backup-aws-s3.log 44 5 * * * /home/user/skynet-webportal/scripts/backup-aws-s3.sh 1>>/home/user/skynet-webportal/logs/backup-aws-s3.log 2>>/home/user/skynet-webportal/logs/backup-aws-s3.log
6 13 * * * /home/user/skynet-webportal/scripts/db_backup.sh 1>>/home/user/skynet-webportal/logs/db_backup.log 2>>/home/user/skynet-webportal/logs/db_backup.log 6 13 * * * /home/user/skynet-webportal/scripts/db_backup.sh 1>>/home/user/skynet-webportal/logs/db_backup.log 2>>/home/user/skynet-webportal/logs/db_backup.log
0 5 * * * /home/user/skynet-webportal/scripts/es_cleaner.py 1 http://localhost:9200 0 5 * * * /home/user/skynet-webportal/scripts/es_cleaner.py 1 http://localhost:9200
15 * * * * /home/user/skynet-webportal/scripts/nginx-prune.sh