Optimize bulk caching performance

This commit is contained in:
jeremystretch 2022-10-19 17:29:35 -04:00
parent e07806930f
commit 64cb2cc085
3 changed files with 54 additions and 32 deletions

View File

@ -63,11 +63,11 @@ class Command(BaseCommand):
app_label = model._meta.app_label app_label = model._meta.app_label
model_name = model._meta.model_name model_name = model._meta.model_name
self.stdout.write(f'Reindexing {app_label}.{model_name}... ', ending='') self.stdout.write(f'Reindexing {app_label}.{model_name}... ', ending='')
i = 0 i = search_backend.cache(model.objects.iterator())
for instance in model.objects.all():
i += search_backend.cache(instance)
if i: if i:
self.stdout.write(f'{i} entries cached.') self.stdout.write(f'{i} entries cached.')
else:
self.stdout.write(f'None found.')
msg = f'Completed.' msg = f'Completed.'
if total_count := search_backend.size: if total_count := search_backend.size:

View File

@ -53,7 +53,7 @@ class SearchIndex:
return str(getattr(instance, field_name)) return str(getattr(instance, field_name))
@classmethod @classmethod
def to_cache(cls, instance): def to_cache(cls, instance, custom_fields=None):
values = [] values = []
# Capture built-in fields # Capture built-in fields
@ -66,8 +66,10 @@ class SearchIndex:
) )
# Capture custom fields # Capture custom fields
if hasattr(instance, 'custom_field_data'): if getattr(instance, 'custom_field_data', None):
for cf, value in instance.get_custom_fields().items(): if custom_fields is None:
custom_fields = instance.get_custom_fields().keys()
for cf in custom_fields:
type_ = cf.search_type type_ = cf.search_type
value = instance.custom_field_data.get(cf.name) value = instance.custom_field_data.get(cf.name)
weight = cf.search_weight weight = cf.search_weight

View File

@ -8,7 +8,7 @@ from django.db.models import F, Window
from django.db.models.functions import window from django.db.models.functions import window
from django.db.models.signals import post_delete, post_save from django.db.models.signals import post_delete, post_save
from extras.models import CachedValue from extras.models import CachedValue, CustomField
from extras.registry import registry from extras.registry import registry
from utilities.querysets import RestrictedPrefetch from utilities.querysets import RestrictedPrefetch
from utilities.templatetags.builtins.filters import bettertitle from utilities.templatetags.builtins.filters import bettertitle
@ -71,7 +71,7 @@ class SearchBackend:
cls.remove(instance) cls.remove(instance)
@classmethod @classmethod
def cache(cls, instance): def cache(cls, instance, indexer=None):
""" """
Create or update the cached representation of an instance. Create or update the cached representation of an instance.
""" """
@ -144,25 +144,37 @@ class CachedValueSearchBackend(SearchBackend):
] ]
@classmethod @classmethod
def cache(cls, instance): def cache(cls, instances, indexer=None, remove_existing=True):
try: content_type = None
indexer = get_indexer(instance) custom_fields = None
except KeyError:
# No indexer has been registered for this model
return
ct = ContentType.objects.get_for_model(instance) # Convert a single instance to an iterable
data = indexer.to_cache(instance) if not hasattr(instances, '__iter__'):
instances = [instances]
buffer = []
counter = 0
for instance in instances:
# Wipe out any previously cached values for the object # Wipe out any previously cached values for the object
if remove_existing:
cls.remove(instance) cls.remove(instance)
# Record any new non-empty values # Determine the indexer
cached_values = [] if indexer is None:
for field in data: try:
cached_values.append( indexer = get_indexer(instance)
content_type = ContentType.objects.get_for_model(indexer.model)
custom_fields = CustomField.objects.filter(content_types=content_type).exclude(search_weight=0)
except KeyError:
# No indexer has been registered for this model
continue
# Generate cache data
for field in indexer.to_cache(instance, custom_fields=custom_fields):
buffer.append(
CachedValue( CachedValue(
object_type=ct, object_type=content_type,
object_id=instance.pk, object_id=instance.pk,
field=field.name, field=field.name,
type=field.type, type=field.type,
@ -170,9 +182,17 @@ class CachedValueSearchBackend(SearchBackend):
value=field.value value=field.value
) )
) )
ret = CachedValue.objects.bulk_create(cached_values)
return len(ret) # Check whether the buffer needs to be flushed
if len(buffer) >= 2000:
counter += len(CachedValue.objects.bulk_create(buffer))
buffer = []
# Final buffer flush
if buffer:
counter += len(CachedValue.objects.bulk_create(buffer))
return counter
@classmethod @classmethod
def remove(cls, instance): def remove(cls, instance):