From 3a69bea8f3a2dfa86e97b25d852cd2fe5dc97ee4 Mon Sep 17 00:00:00 2001 From: Cristopher Hernandez <22552070+CristopherH95@users.noreply.github.com> Date: Sun, 24 Oct 2021 16:24:22 -0700 Subject: [PATCH] Add cleanup for deleted object entries in buildwatson --- tests/test_watson/tests.py | 14 ++++++++++++ watson/management/commands/buildwatson.py | 2 ++ watson/search.py | 26 +++++++++++++++++++++++ 3 files changed, 42 insertions(+) diff --git a/tests/test_watson/tests.py b/tests/test_watson/tests.py index 6d45704..b9682fb 100644 --- a/tests/test_watson/tests.py +++ b/tests/test_watson/tests.py @@ -24,6 +24,7 @@ from django.contrib.auth.models import User from django import template from django.utils.encoding import force_str from django.db.models import Case, When, Value, IntegerField +from django.db import connection from watson import search as watson from watson.models import SearchEntry @@ -186,6 +187,19 @@ class InternalsTest(SearchTestBase): self.assertEqual(watson.search("fooo1").count(), 1) self.assertEqual(watson.search("fooo2").count(), 1) self.assertEqual(watson.search("fooo3").count(), 1) + # Use raw deletion query to remove record directly from the database (no signals triggered). + # This is so that the cleanup functionality of buildwatson can be tested + with connection.cursor() as cursor: + cursor.execute( + 'DELETE FROM ' + WatsonTestModel1._meta.db_table + ' WHERE ' + WatsonTestModel1._meta.pk.name + ' = %s', + [self.test11.id] + ) + # Run the rebuild command again. + call_command("buildwatson", verbosity=0) + # Test that the deleted object is now gone, but the other objects can still be found. + self.assertEqual(watson.search("fooo1").count(), 0) + self.assertEqual(watson.search("fooo2").count(), 1) + self.assertEqual(watson.search("fooo3").count(), 1) def testUpdateSearchIndex(self): # Update a model and make sure that the search results match. diff --git a/watson/management/commands/buildwatson.py b/watson/management/commands/buildwatson.py index 895abf9..4488225 100644 --- a/watson/management/commands/buildwatson.py +++ b/watson/management/commands/buildwatson.py @@ -66,9 +66,11 @@ def rebuild_index_for_model(model_, engine_slug_, verbosity_, slim_=False, batch ) ) if non_atomic_: + search_engine_.cleanup_model_index(model_) _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) else: with transaction.atomic(): + search_engine_.cleanup_model_index(model_) _bulk_save_search_entries(iter_search_entries(), batch_size=batch_size_) return local_refreshed_model_count[0] diff --git a/watson/search.py b/watson/search.py index 41d3584..68dde38 100644 --- a/watson/search.py +++ b/watson/search.py @@ -15,6 +15,7 @@ from django.core.exceptions import ImproperlyConfigured, ObjectDoesNotExist from django.db import models, connections, router from django.db.models import Q from django.db.models.expressions import RawSQL +from django.db.models.functions import Coalesce from django.db.models.query import QuerySet from django.db.models.signals import post_save, pre_delete from django.utils.encoding import force_str @@ -442,6 +443,26 @@ class SearchEngine(object): model=model, )) + def _get_deleted_entries_for_model(self, model): + """Returns a queryset of entries associated with deleted object instances of the given model""" + from django.contrib.contenttypes.models import ContentType + from watson.models import SearchEntry, has_int_pk + content_type = ContentType.objects.get_for_model(model) + # subquery to get entries which cannot be found in the original table + lookup_subquery = models.Subquery( + model.objects.all().values('pk') + ) + # map the lookup to the appropriate id field + if has_int_pk(model): + id_lookup = {'object_id_int__in': lookup_subquery} + else: + id_lookup = {'object_id__in': lookup_subquery} + return SearchEntry.objects.filter( + Q(content_type=content_type) & + Q(engine_slug=self._engine_slug) & + ~Q(**id_lookup) + ) + def _get_entries_for_obj(self, obj): """Returns a queryset of entries associate with the given obj.""" from django.contrib.contenttypes.models import ContentType @@ -501,6 +522,11 @@ class SearchEngine(object): # Oh no! Somehow we've got duplicated search entries! search_entries.exclude(id=search_entries[0].id).delete() + def cleanup_model_index(self, model): + """Removes search index entries which map to deleted object instances for the given model""" + search_entries = self._get_deleted_entries_for_model(model) + search_entries.delete() + def update_obj_index(self, obj): """Updates the search index for the given obj.""" _bulk_save_search_entries(list(self._update_obj_index_iter(obj)))