diff --git a/CHANGELOG.md b/CHANGELOG.md index 947093812..990e42b9d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -58,6 +58,29 @@ https://nms.local/nodes/?name={{ obj.name }} Custom links appear as buttons at the top of the object view. Grouped links will render as a dropdown menu beneath a single button. +### Prometheus Metrics ([#3104](https://github.com/digitalocean/netbox/issues/3104)) + +NetBox now supports optionally exposing native Prometheus metrics from the application. [Prometheus](https://prometheus.io/) +is a popular time series metric platform used for monitoring. NetBox exposes metrics at the `/metrics` HTTP endpoint, e.g. +`https://netbox.local/metrics`. Metric exposition can be toggled with the `METRICS_ENABLED` configuration setting. Metrics +are exposed by default. + +NetBox makes use of the [django-prometheus](https://github.com/korfuri/django-prometheus) library to export a number of +different types of metrics, including: + +- Per model insert, update, and delete counters +- Per view request counters +- Per view request latency histograms +- Request body size histograms +- Response body size histograms +- Response code counters +- Database connection, execution, and error counters +- Cache hit, miss, and invalidation counters +- Django middleware latency histograms +- Other Django related metadata metrics + +For the exhaustive list of exposed metrics, visit the `/metrics` endpoint on your NetBox instance. + ## Changes ### New Dependency: Redis diff --git a/base_requirements.txt b/base_requirements.txt index 2a5265504..e331143ff 100644 --- a/base_requirements.txt +++ b/base_requirements.txt @@ -22,6 +22,10 @@ django-filter # https://github.com/django-mptt/django-mptt django-mptt +# Prometheus metrics library for Django +# https://github.com/korfuri/django-prometheus +django-prometheus + # Abstraction models for rendering and paginating HTML tables # https://github.com/jieter/django-tables2 django-tables2 diff --git a/docs/additional-features/prometheus-metrics.md b/docs/additional-features/prometheus-metrics.md new file mode 100644 index 000000000..a472dc7a9 --- /dev/null +++ b/docs/additional-features/prometheus-metrics.md @@ -0,0 +1,22 @@ +# Prometheus Metrics + +NetBox supports optionally exposing native Prometheus metrics from the application. [Prometheus](https://prometheus.io/) is a popular time series metric platform used for monitoring. + +NetBox exposes metrics at the `/metrics` HTTP endpoint, e.g. `https://netbox.local/metrics`. Metric exposition can be toggled with the `METRICS_ENABLED` configuration setting. Metrics are exposed by default. + +## Metric Types + +NetBox makes use of the [django-prometheus](https://github.com/korfuri/django-prometheus) library to export a number of different types of metrics, including: + +- Per model insert, update, and delete counters +- Per view request counters +- Per view request latency histograms +- Request body size histograms +- Response body size histograms +- Response code counters +- Database connection, execution, and error counters +- Cache hit, miss, and invalidation counters +- Django middleware latency histograms +- Other Django related metadata metrics + +For the exhaustive list of exposed metrics, visit the `/metrics` endpoint on your NetBox instance. diff --git a/docs/configuration/optional-settings.md b/docs/configuration/optional-settings.md index ea550c62f..b5cf79749 100644 --- a/docs/configuration/optional-settings.md +++ b/docs/configuration/optional-settings.md @@ -197,6 +197,14 @@ The file path to the location where media files (such as image attachments) are --- +## METRICS_ENABLED + +Default: True + +Toggle exposing Prometheus metrics at `/metrics`. See the [Prometheus Metrics](../additional-features/prometheus-metrics/) documentation for more details. + +--- + ## NAPALM_USERNAME ## NAPALM_PASSWORD diff --git a/mkdocs.yml b/mkdocs.yml index 47e6e0860..bf2d4c0b3 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -37,6 +37,7 @@ pages: - Webhooks: 'additional-features/webhooks.md' - Change Logging: 'additional-features/change-logging.md' - Caching: 'additional-features/caching.md' + - Prometheus Metrics: 'additional-features/prometheus-metrics.md' - Administration: - Replicating NetBox: 'administration/replicating-netbox.md' - NetBox Shell: 'administration/netbox-shell.md' diff --git a/netbox/extras/apps.py b/netbox/extras/apps.py index 6e6083691..6bb3b9fca 100644 --- a/netbox/extras/apps.py +++ b/netbox/extras/apps.py @@ -7,6 +7,9 @@ class ExtrasConfig(AppConfig): name = "extras" def ready(self): + + import extras.signals + # Check that we can connect to the configured Redis database if webhooks are enabled. if settings.WEBHOOKS_ENABLED: try: diff --git a/netbox/extras/middleware.py b/netbox/extras/middleware.py index be878918b..025aea8df 100644 --- a/netbox/extras/middleware.py +++ b/netbox/extras/middleware.py @@ -7,6 +7,7 @@ from django.conf import settings from django.db.models.signals import post_delete, post_save from django.utils import timezone from django.utils.functional import curry +from django_prometheus.models import model_deletes, model_inserts, model_updates from extras.webhooks import enqueue_webhooks from .constants import ( @@ -37,15 +38,20 @@ def _record_object_deleted(request, instance, **kwargs): if hasattr(instance, 'log_change'): instance.log_change(request.user, request.id, OBJECTCHANGE_ACTION_DELETE) + # Enqueue webhooks enqueue_webhooks(instance, request.user, request.id, OBJECTCHANGE_ACTION_DELETE) + # Increment metric counters + model_deletes.labels(instance._meta.model_name).inc() + class ObjectChangeMiddleware(object): """ - This middleware performs two functions in response to an object being created, updated, or deleted: + This middleware performs three functions in response to an object being created, updated, or deleted: 1. Create an ObjectChange to reflect the modification to the object in the changelog. 2. Enqueue any relevant webhooks. + 3. Increment metric counter for the event type The post_save and pre_delete signals are employed to catch object modifications, however changes are recorded a bit differently for each. Objects being saved are cached into thread-local storage for action *after* the response has @@ -85,6 +91,12 @@ class ObjectChangeMiddleware(object): # Enqueue webhooks enqueue_webhooks(obj, request.user, request.id, action) + # Increment metric counters + if action == OBJECTCHANGE_ACTION_CREATE: + model_inserts.labels(obj._meta.model_name).inc() + elif action == OBJECTCHANGE_ACTION_UPDATE: + model_updates.labels(obj._meta.model_name).inc() + # Housekeeping: 1% chance of clearing out expired ObjectChanges if _thread_locals.changed_objects and settings.CHANGELOG_RETENTION and random.randint(1, 100) == 1: cutoff = timezone.now() - timedelta(days=settings.CHANGELOG_RETENTION) diff --git a/netbox/extras/signals.py b/netbox/extras/signals.py new file mode 100644 index 000000000..aa173b437 --- /dev/null +++ b/netbox/extras/signals.py @@ -0,0 +1,22 @@ +from cacheops.signals import cache_invalidated, cache_read +from prometheus_client import Counter + + +cacheops_cache_hit = Counter('cacheops_cache_hit', 'Number of cache hits') +cacheops_cache_miss = Counter('cacheops_cache_miss', 'Number of cache misses') +cacheops_cache_invalidated = Counter('cacheops_cache_invalidated', 'Number of cache invalidations') + + +def cache_read_collector(sender, func, hit, **kwargs): + if hit: + cacheops_cache_hit.inc() + else: + cacheops_cache_miss.inc() + + +def cache_invalidated_collector(sender, obj_dict, **kwargs): + cacheops_cache_invalidated.inc() + + +cache_read.connect(cache_read_collector) +cache_invalidated.connect(cache_invalidated_collector) diff --git a/netbox/netbox/configuration.example.py b/netbox/netbox/configuration.example.py index adfb8f854..6eda8a666 100644 --- a/netbox/netbox/configuration.example.py +++ b/netbox/netbox/configuration.example.py @@ -129,6 +129,9 @@ MAX_PAGE_SIZE = 1000 # the default value of this setting is derived from the installed location. # MEDIA_ROOT = '/opt/netbox/netbox/media' +# Expose Prometheus monitoring metrics at the HTTP endpoint '/metrics' +METRICS_ENABLED = True + # Credentials that NetBox will uses to authenticate to devices when connecting via NAPALM. NAPALM_USERNAME = '' NAPALM_PASSWORD = '' diff --git a/netbox/netbox/settings.py b/netbox/netbox/settings.py index 662fb1f67..bbb127f65 100644 --- a/netbox/netbox/settings.py +++ b/netbox/netbox/settings.py @@ -77,6 +77,7 @@ LOGIN_TIMEOUT = getattr(configuration, 'LOGIN_TIMEOUT', None) MAINTENANCE_MODE = getattr(configuration, 'MAINTENANCE_MODE', False) MAX_PAGE_SIZE = getattr(configuration, 'MAX_PAGE_SIZE', 1000) MEDIA_ROOT = getattr(configuration, 'MEDIA_ROOT', os.path.join(BASE_DIR, 'media')).rstrip('/') +METRICS_ENABLED = getattr(configuration, 'METRICS_ENABLED', True) NAPALM_ARGS = getattr(configuration, 'NAPALM_ARGS', {}) NAPALM_PASSWORD = getattr(configuration, 'NAPALM_PASSWORD', '') NAPALM_TIMEOUT = getattr(configuration, 'NAPALM_TIMEOUT', 30) @@ -98,9 +99,14 @@ WEBHOOKS_ENABLED = getattr(configuration, 'WEBHOOKS_ENABLED', False) # # Only PostgreSQL is supported -DATABASE.update({ - 'ENGINE': 'django.db.backends.postgresql' -}) +if METRICS_ENABLED: + DATABASE.update({ + 'ENGINE': 'django_prometheus.db.backends.postgresql' + }) +else: + DATABASE.update({ + 'ENGINE': 'django.db.backends.postgresql' + }) DATABASES = { 'default': DATABASE, @@ -161,6 +167,7 @@ INSTALLED_APPS = [ 'debug_toolbar', 'django_filters', 'django_tables2', + 'django_prometheus', 'mptt', 'rest_framework', 'taggit', @@ -185,6 +192,7 @@ if WEBHOOKS_ENABLED: # Middleware MIDDLEWARE = ( 'debug_toolbar.middleware.DebugToolbarMiddleware', + 'django_prometheus.middleware.PrometheusBeforeMiddleware', 'corsheaders.middleware.CorsMiddleware', 'django.contrib.sessions.middleware.SessionMiddleware', 'django.middleware.common.CommonMiddleware', @@ -197,6 +205,7 @@ MIDDLEWARE = ( 'utilities.middleware.LoginRequiredMiddleware', 'utilities.middleware.APIVersionMiddleware', 'extras.middleware.ObjectChangeMiddleware', + 'django_prometheus.middleware.PrometheusAfterMiddleware', ) ROOT_URLCONF = 'netbox.urls' @@ -337,7 +346,7 @@ else: REDIS_CACHE_CON_STRING = 'redis://' if REDIS_PASSWORD: - REDIS_CACHE_CON_STRING = '{}@{}'.format(REDIS_PASSWORD, REDIS_CACHE_CON_STRING) + REDIS_CACHE_CON_STRING = '{}{}@'.format(REDIS_CACHE_CON_STRING, REDIS_PASSWORD) REDIS_CACHE_CON_STRING = '{}{}:{}/{}'.format(REDIS_CACHE_CON_STRING, REDIS_HOST, REDIS_PORT, REDIS_CACHE_DATABASE) @@ -365,6 +374,12 @@ CACHEOPS = { CACHEOPS_DEGRADE_ON_FAILURE = True +# +# Django Prometheus +# +PROMETHEUS_EXPORT_MIGRATIONS = False + + # # Django filters # diff --git a/netbox/netbox/urls.py b/netbox/netbox/urls.py index 45c99beb9..aaef05e00 100644 --- a/netbox/netbox/urls.py +++ b/netbox/netbox/urls.py @@ -73,6 +73,11 @@ if settings.DEBUG: url(r'^__debug__/', include(debug_toolbar.urls)), ] +if settings.METRICS_ENABLED: + _patterns += [ + url('', include('django_prometheus.urls')), + ] + # Prepend BASE_PATH urlpatterns = [ url(r'^{}'.format(settings.BASE_PATH), include(_patterns)) diff --git a/requirements.txt b/requirements.txt index 29b2f8f94..948603c46 100644 --- a/requirements.txt +++ b/requirements.txt @@ -4,6 +4,7 @@ django-cors-headers==2.5.2 django-debug-toolbar==1.11 django-filter==2.1.0 django-mptt==0.9.1 +django-prometheus==1.0.15 django-tables2==2.0.6 django-taggit==1.1.0 django-taggit-serializer==0.1.7