Optimize bulk caching performance

This commit is contained in:
jeremystretch 2022-10-19 17:29:35 -04:00
parent e07806930f
commit 64cb2cc085
3 changed files with 54 additions and 32 deletions

View File

@ -63,11 +63,11 @@ class Command(BaseCommand):
app_label = model._meta.app_label
model_name = model._meta.model_name
self.stdout.write(f'Reindexing {app_label}.{model_name}... ', ending='')
i = 0
for instance in model.objects.all():
i += search_backend.cache(instance)
i = search_backend.cache(model.objects.iterator())
if i:
self.stdout.write(f'{i} entries cached.')
else:
self.stdout.write(f'None found.')
msg = f'Completed.'
if total_count := search_backend.size:

View File

@ -53,7 +53,7 @@ class SearchIndex:
return str(getattr(instance, field_name))
@classmethod
def to_cache(cls, instance):
def to_cache(cls, instance, custom_fields=None):
values = []
# Capture built-in fields
@ -66,8 +66,10 @@ class SearchIndex:
)
# Capture custom fields
if hasattr(instance, 'custom_field_data'):
for cf, value in instance.get_custom_fields().items():
if getattr(instance, 'custom_field_data', None):
if custom_fields is None:
custom_fields = instance.get_custom_fields().keys()
for cf in custom_fields:
type_ = cf.search_type
value = instance.custom_field_data.get(cf.name)
weight = cf.search_weight

View File

@ -8,7 +8,7 @@ from django.db.models import F, Window
from django.db.models.functions import window
from django.db.models.signals import post_delete, post_save
from extras.models import CachedValue
from extras.models import CachedValue, CustomField
from extras.registry import registry
from utilities.querysets import RestrictedPrefetch
from utilities.templatetags.builtins.filters import bettertitle
@ -71,7 +71,7 @@ class SearchBackend:
cls.remove(instance)
@classmethod
def cache(cls, instance):
def cache(cls, instance, indexer=None):
"""
Create or update the cached representation of an instance.
"""
@ -144,25 +144,37 @@ class CachedValueSearchBackend(SearchBackend):
]
@classmethod
def cache(cls, instance):
try:
indexer = get_indexer(instance)
except KeyError:
# No indexer has been registered for this model
return
def cache(cls, instances, indexer=None, remove_existing=True):
content_type = None
custom_fields = None
ct = ContentType.objects.get_for_model(instance)
data = indexer.to_cache(instance)
# Convert a single instance to an iterable
if not hasattr(instances, '__iter__'):
instances = [instances]
buffer = []
counter = 0
for instance in instances:
# Wipe out any previously cached values for the object
if remove_existing:
cls.remove(instance)
# Record any new non-empty values
cached_values = []
for field in data:
cached_values.append(
# Determine the indexer
if indexer is None:
try:
indexer = get_indexer(instance)
content_type = ContentType.objects.get_for_model(indexer.model)
custom_fields = CustomField.objects.filter(content_types=content_type).exclude(search_weight=0)
except KeyError:
# No indexer has been registered for this model
continue
# Generate cache data
for field in indexer.to_cache(instance, custom_fields=custom_fields):
buffer.append(
CachedValue(
object_type=ct,
object_type=content_type,
object_id=instance.pk,
field=field.name,
type=field.type,
@ -170,9 +182,17 @@ class CachedValueSearchBackend(SearchBackend):
value=field.value
)
)
ret = CachedValue.objects.bulk_create(cached_values)
return len(ret)
# Check whether the buffer needs to be flushed
if len(buffer) >= 2000:
counter += len(CachedValue.objects.bulk_create(buffer))
buffer = []
# Final buffer flush
if buffer:
counter += len(CachedValue.objects.bulk_create(buffer))
return counter
@classmethod
def remove(cls, instance):