diff --git a/docs/integrations/rest-api.md b/docs/integrations/rest-api.md index bac984a08..74b421456 100644 --- a/docs/integrations/rest-api.md +++ b/docs/integrations/rest-api.md @@ -341,7 +341,7 @@ When retrieving devices and virtual machines via the REST API, each will include ## Pagination -API responses which contain a list of many objects will be paginated for efficiency. The root JSON object returned by a list endpoint contains the following attributes: +API responses which contain a list of many objects will be paginated for efficiency. NetBox employs offset-based pagination by default, which forms a page by skipping the number of objects indicated by the `offset` URL parameter. The root JSON object returned by a list endpoint contains the following attributes: * `count`: The total number of all objects matching the query * `next`: A hyperlink to the next page of results (if applicable) @@ -398,6 +398,49 @@ The maximum number of objects that can be returned is limited by the [`MAX_PAGE_ !!! warning Disabling the page size limit introduces a potential for very resource-intensive requests, since one API request can effectively retrieve an entire table from the database. +### Cursor-Based Pagination + +For large datasets, offset-based pagination can become inefficient because the database must scan all rows up to the offset. As an alternative, cursor-based pagination uses the `start` query parameter to filter results by primary key (PK), enabling efficient keyset pagination. + +To use cursor-based pagination, pass `start` (the minimum PK value) and `limit` (the page size): + +``` +http://netbox/api/dcim/devices/?start=0&limit=100 +``` + +This returns objects with an `id` greater than or equal to zero, ordered by PK, limited to 100 results. Below is an example showing an arbitrary `start` value. + +```json +{ + "count": null, + "next": "http://netbox/api/dcim/devices/?start=356&limit=100", + "previous": null, + "results": [ + { + "id": 109, + "name": "dist-router07", + ... + }, + ... + { + "id": 356, + "name": "acc-switch492", + ... + } + ] +} +``` + +To iterate through all results, use the `id` of the last object in each response plus one as the `start` value for the next request. Continue until `next` is null. + +!!! info + Some important differences from offset-based pagination: + + * `start` and `offset` are **mutually exclusive**; specifying both will result in a 400 error. + * Results are always ordered by primary key when using `start`. This is required to ensure deterministic behavior. + * `count` is always `null` in cursor mode, as counting all matching rows would partially negate its performance benefit. + * `previous` is always `null`: cursor-based pagination supports only forward navigation. + ## Interacting with Objects ### Retrieving Multiple Objects diff --git a/netbox/netbox/api/pagination.py b/netbox/netbox/api/pagination.py index 15fb49da8..eb0295662 100644 --- a/netbox/netbox/api/pagination.py +++ b/netbox/netbox/api/pagination.py @@ -1,18 +1,40 @@ from django.db.models import QuerySet +from django.utils.translation import gettext_lazy as _ +from rest_framework.exceptions import ValidationError from rest_framework.pagination import LimitOffsetPagination +from rest_framework.utils.urls import remove_query_param, replace_query_param from netbox.api.exceptions import QuerySetNotOrdered from netbox.config import get_config -class OptionalLimitOffsetPagination(LimitOffsetPagination): +class NetBoxPagination(LimitOffsetPagination): """ - Override the stock paginator to allow setting limit=0 to disable pagination for a request. This returns all objects - matching a query, but retains the same format as a paginated request. The limit can only be disabled if - MAX_PAGE_SIZE has been set to 0 or None. + Provides two mutually exclusive pagination mechanisms: offset-based and cursor-based. + + Offset-based pagination employs `offset` and (optionally) `limit` parameters to page through results following the + model's natural order. `offset` indicates the number of results to skip. This provides very human-friendly behavior, + but performance can suffer when querying very large data sets due the overhead required to determine the starting + point in the database. + + Cursor-based pagination employs `start` and (optionally) `limit` parameters to page through results as ordered by + the model's primary key (i.e. `id`). `start` indicates the numeric ID of the first object to return; `limit` + indicates the maximum number of objects to return beginning with the specified ID. Objects *must* be ordered by ID + to ensure pagination is consistent. This approach is less human-friendly but offers superior performance to + offset-based pagination. In cursor mode, `count` is omitted (null) for performance. + + Offset- and cursor-based pagination are mutually exclusive: Only `offset` _or_ `start` is permitted for a request. + + `limit` may be set to zero (`?limit=0`). This returns all objects matching a query, but retains the same format as + a paginated request. The limit can only be disabled if `MAX_PAGE_SIZE` has been set to 0 or None. """ + start_query_param = 'start' + def __init__(self): self.default_limit = get_config().PAGINATE_COUNT + self.start = None + self._page_length = 0 + self._last_pk = None def paginate_queryset(self, queryset, request, view=None): @@ -22,15 +44,42 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination): "ordering has been applied to the queryset for this API endpoint." ) + self.start = self.get_start(request) + self.limit = self.get_limit(request) + self.request = request + + # Cursor-based pagination + if self.start is not None: + if self.offset_query_param in request.query_params: + raise ValidationError( + _("'{start_param}' and '{offset_param}' are mutually exclusive.").format( + start_param=self.start_query_param, + offset_param=self.offset_query_param, + ) + ) + if 'ordering' in request.query_params: + raise ValidationError(_("Ordering cannot be specified in conjunction with cursor-based pagination.")) + + self.count = None + self.offset = 0 + + queryset = queryset.filter(pk__gte=self.start).order_by('pk') + results = list(queryset[:self.limit]) if self.limit else list(queryset) + + self._page_length = len(results) + if results: + self._last_pk = results[-1].pk if hasattr(results[-1], 'pk') else results[-1]['pk'] + + return results + + # Offset-based pagination if isinstance(queryset, QuerySet): self.count = self.get_queryset_count(queryset) else: # We're dealing with an iterable, not a QuerySet self.count = len(queryset) - self.limit = self.get_limit(request) self.offset = self.get_offset(request) - self.request = request if self.limit and self.count > self.limit and self.template is not None: self.display_page_controls = True @@ -42,6 +91,25 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination): return list(queryset[self.offset:self.offset + self.limit]) return list(queryset[self.offset:]) + def get_start(self, request): + try: + value = int(request.query_params[self.start_query_param]) + if value < 0: + raise ValidationError( + _("Invalid '{param}' parameter: must be a non-negative integer.").format( + param=self.start_query_param, + ) + ) + return value + except KeyError: + return None + except (ValueError, TypeError): + raise ValidationError( + _("Invalid '{param}' parameter: must be a non-negative integer.").format( + param=self.start_query_param, + ) + ) + def get_limit(self, request): max_limit = self.default_limit MAX_PAGE_SIZE = get_config().MAX_PAGE_SIZE @@ -75,6 +143,16 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination): if not self.limit: return None + # Cursor mode + if self.start is not None: + if self._page_length < self.limit: + return None + url = self.request.build_absolute_uri() + url = replace_query_param(url, self.start_query_param, self._last_pk + 1) + url = replace_query_param(url, self.limit_query_param, self.limit) + url = remove_query_param(url, self.offset_query_param) + return url + return super().get_next_link() def get_previous_link(self): @@ -83,10 +161,30 @@ class OptionalLimitOffsetPagination(LimitOffsetPagination): if not self.limit: return None + # Cursor mode: forward-only + if self.start is not None: + return None + return super().get_previous_link() + def get_schema_operation_parameters(self, view): + parameters = super().get_schema_operation_parameters(view) + parameters.append({ + 'name': self.start_query_param, + 'required': False, + 'in': 'query', + 'description': ( + 'Cursor-based pagination: return results with pk >= start, ordered by pk. ' + 'Mutually exclusive with offset.' + ), + 'schema': { + 'type': 'integer', + }, + }) + return parameters -class StripCountAnnotationsPaginator(OptionalLimitOffsetPagination): + +class StripCountAnnotationsPaginator(NetBoxPagination): """ Strips the annotations on the queryset before getting the count to optimize pagination of complex queries. diff --git a/netbox/netbox/settings.py b/netbox/netbox/settings.py index de741d9a3..7b9a5567c 100644 --- a/netbox/netbox/settings.py +++ b/netbox/netbox/settings.py @@ -724,7 +724,7 @@ REST_FRAMEWORK = { 'rest_framework.filters.OrderingFilter', ), 'DEFAULT_METADATA_CLASS': 'netbox.api.metadata.BulkOperationMetadata', - 'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.OptionalLimitOffsetPagination', + 'DEFAULT_PAGINATION_CLASS': 'netbox.api.pagination.NetBoxPagination', 'DEFAULT_PARSER_CLASSES': ( 'rest_framework.parsers.JSONParser', 'rest_framework.parsers.MultiPartParser', diff --git a/netbox/netbox/tests/test_api.py b/netbox/netbox/tests/test_api.py index 5cc2bf060..0cff906a4 100644 --- a/netbox/netbox/tests/test_api.py +++ b/netbox/netbox/tests/test_api.py @@ -2,10 +2,11 @@ import uuid from django.test import RequestFactory, TestCase from django.urls import reverse +from rest_framework.exceptions import ValidationError from rest_framework.request import Request from netbox.api.exceptions import QuerySetNotOrdered -from netbox.api.pagination import OptionalLimitOffsetPagination +from netbox.api.pagination import NetBoxPagination from users.models import Token from utilities.testing import APITestCase @@ -48,7 +49,7 @@ class AppTest(APITestCase): class OptionalLimitOffsetPaginationTest(TestCase): def setUp(self): - self.paginator = OptionalLimitOffsetPagination() + self.paginator = NetBoxPagination() self.factory = RequestFactory() def _make_drf_request(self, path='/', query_params=None): @@ -80,3 +81,33 @@ class OptionalLimitOffsetPaginationTest(TestCase): request = self._make_drf_request() self.paginator.paginate_queryset(iterable, request) # Should not raise exception + + def test_get_start_returns_none_when_absent(self): + """get_start() returns None when start param is not in the request""" + request = self._make_drf_request() + self.assertIsNone(self.paginator.get_start(request)) + + def test_get_start_returns_integer(self): + """get_start() returns an integer when start param is present""" + request = self._make_drf_request(query_params={'start': '42'}) + self.assertEqual(self.paginator.get_start(request), 42) + + def test_get_start_raises_for_negative(self): + """get_start() raises ValidationError for negative values""" + request = self._make_drf_request(query_params={'start': '-1'}) + with self.assertRaises(ValidationError): + self.paginator.get_start(request) + + def test_cursor_and_offset_conflict_raises_validation_error(self): + """paginate_queryset() raises ValidationError when both start and offset are specified""" + queryset = Token.objects.all().order_by('created') + request = self._make_drf_request(query_params={'start': '1', 'offset': '10'}) + with self.assertRaises(ValidationError): + self.paginator.paginate_queryset(queryset, request) + + def test_cursor_and_ordering_conflict_raises_validation_error(self): + """paginate_queryset() raises ValidationError when both start and ordering are specified""" + queryset = Token.objects.all().order_by('created') + request = self._make_drf_request(query_params={'start': '1', 'ordering': 'created'}) + with self.assertRaises(ValidationError): + self.paginator.paginate_queryset(queryset, request) diff --git a/netbox/utilities/tests/test_api.py b/netbox/utilities/tests/test_api.py index 885f71a73..290e98f1e 100644 --- a/netbox/utilities/tests/test_api.py +++ b/netbox/utilities/tests/test_api.py @@ -187,6 +187,116 @@ class APIPaginationTestCase(APITestCase): self.assertIsNone(response.data['previous']) self.assertEqual(len(response.data['results']), 100) + def test_cursor_pagination(self): + """Basic cursor pagination returns results ordered by PK with correct next link.""" + first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first() + response = self.client.get(f'{self.url}?start={first_pk}&limit=10', format='json', **self.header) + + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertIsNone(response.data['count']) + self.assertIsNone(response.data['previous']) + self.assertEqual(len(response.data['results']), 10) + + # Results should be ordered by PK + pks = [r['id'] for r in response.data['results']] + self.assertEqual(pks, sorted(pks)) + + # Next link should use start parameter + last_pk = pks[-1] + self.assertIn(f'start={last_pk + 1}', response.data['next']) + self.assertIn('limit=10', response.data['next']) + + def test_cursor_pagination_last_page(self): + """Cursor pagination returns null next link when fewer results than limit.""" + last_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last() + response = self.client.get(f'{self.url}?start={last_pk}&limit=10', format='json', **self.header) + + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']), 1) + self.assertIsNone(response.data['next']) + self.assertIsNone(response.data['previous']) + + def test_cursor_pagination_no_results(self): + """Cursor pagination beyond all PKs returns empty results.""" + max_pk = Site.objects.order_by('pk').values_list('pk', flat=True).last() + response = self.client.get(f'{self.url}?start={max_pk + 1000}&limit=10', format='json', **self.header) + + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertEqual(len(response.data['results']), 0) + self.assertIsNone(response.data['next']) + + def test_cursor_and_offset_conflict(self): + """Specifying both start and offset returns a 400 error.""" + with disable_warnings('django.request'): + response = self.client.get(f'{self.url}?start=1&offset=10', format='json', **self.header) + self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST) + + def test_cursor_and_ordering_conflict(self): + """Specifying both start and ordering returns a 400 error.""" + with disable_warnings('django.request'): + response = self.client.get(f'{self.url}?start=1&ordering=name', format='json', **self.header) + self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST) + + def test_cursor_negative_start(self): + """Negative start value returns a 400 error.""" + with disable_warnings('django.request'): + response = self.client.get(f'{self.url}?start=-1', format='json', **self.header) + self.assertHttpStatus(response, status.HTTP_400_BAD_REQUEST) + + def test_cursor_with_filters(self): + """Cursor pagination works alongside other query filters.""" + response = self.client.get(f'{self.url}?start=0&limit=10&name=Site 1', format='json', **self.header) + + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertIsNone(response.data['count']) + results = response.data['results'] + self.assertEqual(len(results), 1) + self.assertEqual(results[0]['name'], 'Site 1') + + def test_offset_multi_page_traversal(self): + """Traverse all 100 objects using offset pagination and verify complete, non-overlapping coverage.""" + collected_pks = [] + url = f'{self.url}?limit=10' + + while url: + response = self.client.get(url, format='json', **self.header) + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertEqual(response.data['count'], 100) + collected_pks.extend(r['id'] for r in response.data['results']) + url = response.data['next'] + + # Should have collected exactly 100 unique objects + self.assertEqual(len(set(collected_pks)), 100) + + def test_cursor_multi_page_traversal(self): + """Traverse all 100 objects using cursor pagination and verify complete, non-overlapping coverage.""" + collected_pks = [] + first_pk = Site.objects.order_by('pk').values_list('pk', flat=True).first() + url = f'{self.url}?start={first_pk}&limit=10' + + while url: + response = self.client.get(url, format='json', **self.header) + self.assertHttpStatus(response, status.HTTP_200_OK) + self.assertIsNone(response.data['count']) + self.assertIsNone(response.data['previous']) + + page_pks = [r['id'] for r in response.data['results']] + + # Each page should be ordered by PK + self.assertEqual(page_pks, sorted(page_pks)) + + # No overlap with previously collected PKs + self.assertFalse(set(page_pks) & set(collected_pks)) + + collected_pks.extend(page_pks) + url = response.data['next'] + + # Should have collected exactly 100 unique objects + self.assertEqual(len(set(collected_pks)), 100) + + # Full result set should be in PK order + self.assertEqual(collected_pks, sorted(collected_pks)) + class APIOrderingTestCase(APITestCase): user_permissions = ('dcim.view_site',)