I wanted to check how the behaviour when logstash server was down and a problem occurred (I am using gevent worker with gunicorn)
2023-01-05 22:03:23,859 - [9/140512473585056] - E - LogProcessingWorker - An error occurred while sending events: HTTPSConnectionPool(host='xxx.xxx.com', port=5960): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fcb8e71e920>, 'Connection to xxx.xxx.com timed out. (connect timeout=5.0)'))
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/logstash_async/worker.py", line 118, in _fetch_events
self._fetch_event()
File "/usr/local/lib/python3.10/site-packages/logstash_async/worker.py", line 140, in _fetch_event
self._event = self._queue.get(block=False)
File "/usr/local/lib/python3.10/queue.py", line 168, in get
raise Empty
_queue.Empty
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/urllib3/connection.py", line 174, in _new_conn
conn = connection.create_connection(
File "/usr/local/lib/python3.10/site-packages/urllib3/util/connection.py", line 95, in create_connection
raise err
File "/usr/local/lib/python3.10/site-packages/urllib3/util/connection.py", line 85, in create_connection
sock.connect(sa)
File "/usr/local/lib/python3.10/site-packages/gevent/_socketcommon.py", line 613, in connect
self._wait(self._write_event)
File "src/gevent/_hub_primitives.py", line 317, in gevent._gevent_c_hub_primitives.wait_on_socket
File "src/gevent/_hub_primitives.py", line 322, in gevent._gevent_c_hub_primitives.wait_on_socket
File "src/gevent/_hub_primitives.py", line 313, in gevent._gevent_c_hub_primitives._primitive_wait
File "src/gevent/_hub_primitives.py", line 314, in gevent._gevent_c_hub_primitives._primitive_wait
File "src/gevent/_hub_primitives.py", line 46, in gevent._gevent_c_hub_primitives.WaitOperationsGreenlet.wait
File "src/gevent/_hub_primitives.py", line 46, in gevent._gevent_c_hub_primitives.WaitOperationsGreenlet.wait
File "src/gevent/_hub_primitives.py", line 55, in gevent._gevent_c_hub_primitives.WaitOperationsGreenlet.wait
File "src/gevent/_waiter.py", line 154, in gevent._gevent_c_waiter.Waiter.get
File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
File "src/gevent/_greenlet_primitives.py", line 61, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
File "src/gevent/_greenlet_primitives.py", line 65, in gevent._gevent_c_greenlet_primitives.SwitchOutGreenletWithLoop.switch
File "src/gevent/_gevent_c_greenlet_primitives.pxd", line 35, in gevent._gevent_c_greenlet_primitives._greenlet_switch
TimeoutError: timed out
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 703, in urlopen
httplib_response = self._make_request(
File "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 386, in _make_request
self._validate_conn(conn)
File "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 1042, in _validate_conn
conn.connect()
File "/usr/local/lib/python3.10/site-packages/urllib3/connection.py", line 358, in connect
self.sock = conn = self._new_conn()
File "/usr/local/lib/python3.10/site-packages/urllib3/connection.py", line 179, in _new_conn
raise ConnectTimeoutError(
urllib3.exceptions.ConnectTimeoutError: (<urllib3.connection.HTTPSConnection object at 0x7fcb8e71e920>, 'Connection to xxx.xxx.com timed out. (connect timeout=5.0)')
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/requests/adapters.py", line 489, in send
resp = conn.urlopen(
File "/usr/local/lib/python3.10/site-packages/urllib3/connectionpool.py", line 787, in urlopen
retries = retries.increment(
File "/usr/local/lib/python3.10/site-packages/urllib3/util/retry.py", line 592, in increment
raise MaxRetryError(_pool, url, error or ResponseError(cause))
urllib3.exceptions.MaxRetryError: HTTPSConnectionPool(host='xxx.xxx.com', port=5960): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fcb8e71e920>, 'Connection to xxx.xxx.com timed out. (connect timeout=5.0)'))
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "/usr/local/lib/python3.10/site-packages/logstash_async/worker.py", line 221, in _flush_queued_events
self._send_events(events)
File "/usr/local/lib/python3.10/site-packages/logstash_async/worker.py", line 284, in _send_events
self._transport.send(events, use_logging=use_logging)
File "/usr/local/lib/python3.10/site-packages/logstash_async/transport.py", line 364, in send
response = self.__session.post(
File "/usr/local/lib/python3.10/site-packages/requests/sessions.py", line 635, in post
return self.request("POST", url, data=data, json=json, **kwargs)
File "/usr/local/lib/python3.10/site-packages/requests/sessions.py", line 587, in request
resp = self.send(prep, **send_kwargs)
File "/usr/local/lib/python3.10/site-packages/requests/sessions.py", line 701, in send
r = adapter.send(request, **kwargs)
File "/usr/local/lib/python3.10/site-packages/requests/adapters.py", line 553, in send
raise ConnectTimeout(e, request=request)
requests.exceptions.ConnectTimeout: HTTPSConnectionPool(host='xxx.xxx.com', port=5960): Max retries exceeded with url: / (Caused by ConnectTimeoutError(<urllib3.connection.HTTPSConnection object at 0x7fcb8e71e920>, 'Connection to xxx.xxx.com timed out. (connect timeout=5.0)'))
gunicorn config.wsgi:application --bind 0.0.0.0:8000 \
--worker-class=gevent \
--worker-connections=100 \
--workers=4 \
--timeout=60 \
--graceful-timeout=30 \
--max-requests=5000 \
--max-requests-jitter=250 \
ELK_ENABLED = env.bool('ELK_ENABLED', default=False)
if ELK_ENABLED:
LOGSTASH_LOGGING_HANDLER_NAME = 'logstash'
LOGGING_LOGGING_FORMATTER_NAME = 'logstash'
from logstash_async.constants import constants
constants.QUEUED_EVENTS_FLUSH_INTERVAL = 15
constants.QUEUED_EVENTS_FLUSH_COUNT = 500
LOGGING['formatters'].update(**{
LOGGING_LOGGING_FORMATTER_NAME: {
'()': 'logstash_async.formatter.DjangoLogstashFormatter',
'message_type': env.str('ELK_MESSAGE_TYPE', default='Django'),
'fqdn': False, # Fully qualified domain name. Default value: false.
'extra_prefix': 'extra',
},
})
LOGGING['handlers'].update(**{
LOGSTASH_LOGGING_HANDLER_NAME: {
'level': env.str('ELK_LOGSTASH_LOGLEVEL', default='INFO'),
'use_logging': True,
'class': 'logstash_async.handler.AsynchronousLogstashHandler',
'event_ttl': 5 * 60, # 5 minutes
'formatter': LOGGING_LOGGING_FORMATTER_NAME,
'transport': 'logstash_async.transport.HttpTransport',
'host': env.str('ELK_LOGSTASH_HOST', default='localhost'),
'port': env.int('ELK_LOGSTASH_PORT', default=5959),
'username': env.str('ELK_LOGSTASH_USERNAME', default=None),
'password': env.str('ELK_LOGSTASH_PASSWORD', default=None),
'max_content_length': 10 * 1024 * 1024,
'ssl_enable': True,
'ssl_verify': True,
'ca_certs': None,
'certfile': None,
'keyfile': None,
'database_path': f'{ROOT_DIR}/logstash.db',
},
})
When the server is up again, logs are not sent (I suppose the reason is because of the above crash). FYI Other services (celery and other manage.py commands) work just fine.