Merge pull request #703 from SkynetLabs/ivo/jaeger

Add skyd tracing with Jaeger
This commit is contained in:
Ivaylo Novakov 2021-05-10 09:22:31 +02:00 committed by GitHub
commit 019f2133f3
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 204 additions and 1 deletions

86
docker-compose.jaeger.yml Normal file
View File

@ -0,0 +1,86 @@
version: '3.7'
services:
sia:
environment:
- JAEGER_DISABLED=${JAEGER_DISABLED:-true} # Enable/Disable tracing
- JAEGER_SERVICE_NAME=${PORTAL_NAME:-Skyd} # change to e.g. eu-ger-1
# Configuration
# See https://github.com/jaegertracing/jaeger-client-go#environment-variables
# for all options.
- JAEGER_SAMPLER_TYPE=probabilistic
- JAEGER_SAMPLER_PARAM=0.1
- JAEGER_AGENT_HOST=jaeger-agent
- JAEGER_AGENT_PORT=6831
- JAEGER_REPORTER_LOG_SPANS=false
depends_on:
- jaeger-agent
jaeger-agent:
image: jaegertracing/jaeger-agent
command: [ "--reporter.grpc.host-port=jaeger-collector:14250", "--reporter.grpc.retry.max=1000" ]
container_name: jaeger-agent
restart: on-failure
expose:
- 6831
- 6832
- 5778
environment:
- LOG_LEVEL=debug
networks:
shared:
ipv4_address: 10.10.10.90
depends_on:
- jaeger-collector
jaeger-collector:
image: jaegertracing/jaeger-collector
command: [ "--es.num-shards=1", "--es.num-replicas=0", "--es.server-urls=http://elasticsearch:9200" ]
container_name: jaeger-collector
restart: on-failure
expose:
- 14269
- 14268
- 14250
environment:
- SPAN_STORAGE_TYPE=elasticsearch
- LOG_LEVEL=debug
networks:
shared:
ipv4_address: 10.10.10.91
depends_on:
- elasticsearch
jaeger-query:
image: jaegertracing/jaeger-query
command: [ "--es.num-shards=1", "--es.num-replicas=0", "--es.server-urls=http://elasticsearch:9200" ]
container_name: jaeger-query
restart: on-failure
ports:
- "127.0.0.1:16686:16686"
expose:
- 16687
environment:
- SPAN_STORAGE_TYPE=elasticsearch
- LOG_LEVEL=debug
networks:
shared:
ipv4_address: 10.10.10.92
depends_on:
- elasticsearch
elasticsearch:
image: docker.elastic.co/elasticsearch/elasticsearch-oss:6.8.15
container_name: elasticsearch
restart: on-failure
environment:
- discovery.type=single-node
volumes:
# This dir needs to be chowned to 1000:1000
- ./docker/data/elasticsearch/data:/usr/share/elasticsearch/data
ports:
# We need to expose this port, so we can prune the indexes.
- "127.0.0.1:9200:9200"
networks:
shared:
ipv4_address: 10.10.10.93

117
scripts/es_cleaner.py Normal file
View File

@ -0,0 +1,117 @@
#!/usr/bin/env python3
import curator
import elasticsearch
import os
import ssl
import sys
TIMEOUT=120
def main():
if len(sys.argv) != 3:
print('USAGE: [INDEX_PREFIX=(default "")] [ARCHIVE=(default false)] ... {} NUM_OF_DAYS http://HOSTNAME[:PORT]'.format(sys.argv[0]))
print('NUM_OF_DAYS ... delete indices that are older than the given number of days.')
print('HOSTNAME ... specifies which Elasticsearch hosts URL to search and delete indices from.')
print('TIMEOUT ... number of seconds to wait for master node response.'.format(TIMEOUT))
print('INDEX_PREFIX ... specifies index prefix.')
print('INDEX_DATE_SEPARATOR ... specifies index date separator.')
print('ARCHIVE ... specifies whether to remove archive indices (only works for rollover) (default false).')
print('ROLLOVER ... specifies whether to remove indices created by rollover (default false).')
print('ES_USERNAME ... The username required by Elasticsearch.')
print('ES_PASSWORD ... The password required by Elasticsearch.')
print('ES_TLS ... enable TLS (default false).')
print('ES_TLS_CA ... Path to TLS CA file.')
print('ES_TLS_CERT ... Path to TLS certificate file.')
print('ES_TLS_KEY ... Path to TLS key file.')
print('ES_TLS_SKIP_HOST_VERIFY ... (insecure) Skip server\'s certificate chain and host name verification.')
sys.exit(1)
client = create_client(os.getenv("ES_USERNAME"), os.getenv("ES_PASSWORD"), str2bool(os.getenv("ES_TLS", 'false')), os.getenv("ES_TLS_CA"), os.getenv("ES_TLS_CERT"), os.getenv("ES_TLS_KEY"), str2bool(os.getenv("ES_TLS_SKIP_HOST_VERIFY", 'false')))
ilo = curator.IndexList(client)
empty_list(ilo, 'Elasticsearch has no indices')
prefix = os.getenv("INDEX_PREFIX", '')
if prefix != '':
prefix += '-'
separator = os.getenv("INDEX_DATE_SEPARATOR", '-')
if str2bool(os.getenv("ARCHIVE", 'false')):
filter_archive_indices_rollover(ilo, prefix)
else:
if str2bool(os.getenv("ROLLOVER", 'false')):
filter_main_indices_rollover(ilo, prefix)
else:
filter_main_indices(ilo, prefix, separator)
empty_list(ilo, 'No indices to delete')
for index in ilo.working_list():
print("Removing", index)
timeout = int(os.getenv("TIMEOUT", TIMEOUT))
delete_indices = curator.DeleteIndices(ilo, master_timeout=timeout)
delete_indices.do_action()
def filter_main_indices(ilo, prefix, separator):
date_regex = "\d{4}" + separator + "\d{2}" + separator + "\d{2}"
time_string = "%Y" + separator + "%m" + separator + "%d"
ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-(span|service|dependencies)-" + date_regex)
empty_list(ilo, "No indices to delete")
# This excludes archive index as we use source='name'
# source `creation_date` would include archive index
ilo.filter_by_age(source='name', direction='older', timestring=time_string, unit='days', unit_count=int(sys.argv[1]))
def filter_main_indices_rollover(ilo, prefix):
ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-(span|service)-\d{6}")
empty_list(ilo, "No indices to delete")
# do not remove active write indices
ilo.filter_by_alias(aliases=[prefix + 'jaeger-span-write'], exclude=True)
empty_list(ilo, "No indices to delete")
ilo.filter_by_alias(aliases=[prefix + 'jaeger-service-write'], exclude=True)
empty_list(ilo, "No indices to delete")
ilo.filter_by_age(source='creation_date', direction='older', unit='days', unit_count=int(sys.argv[1]))
def filter_archive_indices_rollover(ilo, prefix):
# Remove only rollover archive indices
# Do not remove active write archive index
ilo.filter_by_regex(kind='regex', value=prefix + "jaeger-span-archive-\d{6}")
empty_list(ilo, "No indices to delete")
ilo.filter_by_alias(aliases=[prefix + 'jaeger-span-archive-write'], exclude=True)
empty_list(ilo, "No indices to delete")
ilo.filter_by_age(source='creation_date', direction='older', unit='days', unit_count=int(sys.argv[1]))
def empty_list(ilo, error_msg):
try:
ilo.empty_list_check()
except curator.NoIndices:
print(error_msg)
sys.exit(0)
def str2bool(v):
return v.lower() in ('true', '1')
def create_client(username, password, tls, ca, cert, key, skipHostVerify):
context = ssl.create_default_context()
if ca is not None:
context = ssl.create_default_context(ssl.Purpose.SERVER_AUTH, cafile=ca)
elif skipHostVerify:
context.check_hostname = False
context.verify_mode = ssl.CERT_NONE
if username is not None and password is not None:
return elasticsearch.Elasticsearch(sys.argv[2:], http_auth=(username, password), ssl_context=context)
elif tls:
context.load_cert_chain(certfile=cert, keyfile=key)
return elasticsearch.Elasticsearch(sys.argv[2:], ssl_context=context)
else:
return elasticsearch.Elasticsearch(sys.argv[2:], ssl_context=context)
if __name__ == "__main__":
main()

View File

@ -5,7 +5,7 @@ set -e # exit on first error
sudo apt-get update sudo apt-get update
sudo apt-get -y install python3-pip sudo apt-get -y install python3-pip
pip3 install discord.py python-dotenv requests pip3 install discord.py python-dotenv requests elasticsearch-curator
# add cron entries to user crontab # add cron entries to user crontab
crontab -u user /home/user/skynet-webportal/setup-scripts/support/crontab crontab -u user /home/user/skynet-webportal/setup-scripts/support/crontab