From acdf4bbefe14e1eb7c92d0b808b698854521c11d Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 10 Feb 2015 16:36:50 +0000 Subject: [PATCH 1/4] Added get_searchable_content method to all block types --- wagtail/wagtailcore/blocks.py | 34 ++++++++++ wagtail/wagtailcore/tests/test_blocks.py | 80 ++++++++++++++++++++++++ 2 files changed, 114 insertions(+) diff --git a/wagtail/wagtailcore/blocks.py b/wagtail/wagtailcore/blocks.py index 8d45a9df2..c08c424bb 100644 --- a/wagtail/wagtailcore/blocks.py +++ b/wagtail/wagtailcore/blocks.py @@ -225,6 +225,12 @@ class Block(six.with_metaclass(BaseBlock, object)): """ return force_text(value) + def get_searchable_content(self, value): + """ + Returns a list of strings containing text content within this block to be used in a search engine. + """ + return [] + def __eq__(self, other): """ The deep_deconstruct method in django.db.migrations.autodetector.MigrationAutodetector does not @@ -347,6 +353,10 @@ class FieldBlock(Block): def clean(self, value): return self.field.clean(value) + def get_searchable_content(self, value): + return [value] + + class CharBlock(FieldBlock): def __init__(self, required=True, help_text=None, max_length=None, min_length=None, **kwargs): # TODO: decide what to do about 'label' and 'initial' parameters to the form field @@ -542,6 +552,14 @@ class BaseStructBlock(Block): for name, val in value.items() ]) + def get_searchable_content(self, value): + content = [] + + for name, block in self.child_blocks.items(): + content.extend(block.get_searchable_content(value.get(name, block.meta.default))) + + return content + def deconstruct(self): """ Always deconstruct StructBlock instances as if they were plain StructBlocks with all of the @@ -747,6 +765,14 @@ class ListBlock(Block): ) return format_html("", children) + def get_searchable_content(self, value): + content = [] + + for child_value in value: + content.extend(self.child_block.get_searchable_content(child_value)) + + return content + # =========== # StreamBlock @@ -925,6 +951,14 @@ class BaseStreamBlock(Block): [(child, child.block_type) for child in value] ) + def get_searchable_content(self, value): + content = [] + + for child in value: + content.extend(child.block.get_searchable_content(child.value)) + + return content + def deconstruct(self): """ Always deconstruct StreamBlock instances as if they were plain StreamBlocks with all of the diff --git a/wagtail/wagtailcore/tests/test_blocks.py b/wagtail/wagtailcore/tests/test_blocks.py index 6d9fd0115..06186633d 100644 --- a/wagtail/wagtailcore/tests/test_blocks.py +++ b/wagtail/wagtailcore/tests/test_blocks.py @@ -35,6 +35,12 @@ class TestFieldBlock(unittest.TestCase): self.assertIn('This field is required.', html) + def test_charfield_searchable_content(self): + block = blocks.CharBlock() + content = block.get_searchable_content("Hello world!") + + self.assertEqual(content, ["Hello world!"]) + def test_choicefield_render(self): class ChoiceBlock(blocks.FieldBlock): field = forms.ChoiceField(choices=( @@ -62,6 +68,19 @@ class TestFieldBlock(unittest.TestCase): self.assertIn('', html) self.assertIn('', html) + @unittest.expectedFailure # Returning "choice-1" instead of "Choice 1" + def test_choicefield_searchable_content(self): + class ChoiceBlock(blocks.FieldBlock): + field = forms.ChoiceField(choices=( + ('choice-1', "Choice 1"), + ('choice-2', "Choice 2"), + )) + + block = ChoiceBlock() + content = block.get_searchable_content("choice-1") + + self.assertEqual(content, ["Choice 1"]) + class TestMeta(unittest.TestCase): def test_set_template_with_meta(self): @@ -262,6 +281,19 @@ class TestStructBlock(unittest.TestCase): block = LinkBlock() self.assertIn('', block.all_html_declarations()) + def test_searchable_content(self): + class LinkBlock(blocks.StructBlock): + title = blocks.CharBlock() + link = blocks.URLBlock() + + block = LinkBlock() + content = block.get_searchable_content({ + 'title': "Wagtail site", + 'link': 'http://www.wagtail.io', + }) + + self.assertEqual(content, ["Wagtail site", "http://www.wagtail.io"]) + class TestListBlock(unittest.TestCase): def test_initialise_with_class(self): @@ -398,6 +430,25 @@ class TestListBlock(unittest.TestCase): block = blocks.ListBlock(CharBlockWithDeclarations()) self.assertIn('', block.all_html_declarations()) + def test_searchable_content(self): + class LinkBlock(blocks.StructBlock): + title = blocks.CharBlock() + link = blocks.URLBlock() + + block = blocks.ListBlock(LinkBlock()) + content = block.get_searchable_content([ + { + 'title': "Wagtail", + 'link': 'http://www.wagtail.io', + }, + { + 'title': "Django", + 'link': 'http://www.djangoproject.com', + }, + ]) + + self.assertEqual(content, ["Wagtail", "http://www.wagtail.io", "Django", "http://www.djangoproject.com"]) + class TestStreamBlock(unittest.TestCase): def test_initialisation(self): @@ -646,3 +697,32 @@ class TestStreamBlock(unittest.TestCase): block_value = block.value_from_datadict(post_data, {}, 'article') self.assertEqual(block_value[2].value, "heading 2") + + def test_searchable_content(self): + class ArticleBlock(blocks.StreamBlock): + heading = blocks.CharBlock() + paragraph = blocks.CharBlock() + + block = ArticleBlock() + value = block.to_python([ + { + 'type': 'heading', + 'value': "My title", + }, + { + 'type': 'paragraph', + 'value': 'My first paragraph', + }, + { + 'type': 'paragraph', + 'value': 'My second paragraph', + }, + ]) + + content = block.get_searchable_content(value) + + self.assertEqual(content, [ + "My title", + "My first paragraph", + "My second paragraph", + ]) From 3843123e41114b7060b3d44f8d0dd329a51236e2 Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Tue, 10 Feb 2015 16:50:32 +0000 Subject: [PATCH 2/4] Hooked blocks get_searchable_content into wagtailsearch --- wagtail/wagtailcore/fields.py | 3 +++ wagtail/wagtailsearch/index.py | 5 ++++- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/wagtail/wagtailcore/fields.py b/wagtail/wagtailcore/fields.py index ede90dacd..d139a3683 100644 --- a/wagtail/wagtailcore/fields.py +++ b/wagtail/wagtailcore/fields.py @@ -82,3 +82,6 @@ class StreamField(with_metaclass(models.SubfieldBase, models.Field)): def value_to_string(self, obj): value = self._get_val_from_obj(obj) return self.get_prep_value(value) + + def get_searchable_content(self, value): + return self.stream_block.get_searchable_content(value) diff --git a/wagtail/wagtailsearch/index.py b/wagtail/wagtailsearch/index.py index bbe9dbf6a..fa2f317ce 100644 --- a/wagtail/wagtailsearch/index.py +++ b/wagtail/wagtailsearch/index.py @@ -116,7 +116,10 @@ class BaseField(object): def get_value(self, obj): try: field = self.get_field(obj.__class__) - return field._get_val_from_obj(obj) + value = field._get_val_from_obj(obj) + if hasattr(field, 'get_searchable_content'): + value = field.get_searchable_content(value) + return value except models.fields.FieldDoesNotExist: value = getattr(obj, self.field_name, None) if hasattr(value, '__call__'): From 23c632b95a67691c809d8cadafe80cb827513eff Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 18 Feb 2015 13:42:46 +0000 Subject: [PATCH 3/4] Only give searchable content for text fields --- wagtail/wagtailcore/blocks.py | 12 +++++++++--- wagtail/wagtailcore/tests/test_blocks.py | 4 ++-- 2 files changed, 11 insertions(+), 5 deletions(-) diff --git a/wagtail/wagtailcore/blocks.py b/wagtail/wagtailcore/blocks.py index c08c424bb..b476e6eb3 100644 --- a/wagtail/wagtailcore/blocks.py +++ b/wagtail/wagtailcore/blocks.py @@ -353,9 +353,6 @@ class FieldBlock(Block): def clean(self, value): return self.field.clean(value) - def get_searchable_content(self, value): - return [value] - class CharBlock(FieldBlock): def __init__(self, required=True, help_text=None, max_length=None, min_length=None, **kwargs): @@ -363,11 +360,16 @@ class CharBlock(FieldBlock): self.field = forms.CharField(required=required, help_text=help_text, max_length=max_length, min_length=min_length) super(CharBlock, self).__init__(**kwargs) + def get_searchable_content(self, value): + return [value] + + class URLBlock(FieldBlock): def __init__(self, required=True, help_text=None, max_length=None, min_length=None, **kwargs): self.field = forms.URLField(required=required, help_text=help_text, max_length=max_length, min_length=min_length) super(URLBlock, self).__init__(**kwargs) + class RichTextBlock(FieldBlock): @cached_property def field(self): @@ -377,6 +379,10 @@ class RichTextBlock(FieldBlock): def render_basic(self, value): return mark_safe('
' + expand_db_html(value) + '
') + def get_searchable_content(self, value): + return [value] + + class RawHTMLBlock(FieldBlock): def __init__(self, required=True, help_text=None, max_length=None, min_length=None, **kwargs): self.field = forms.CharField( diff --git a/wagtail/wagtailcore/tests/test_blocks.py b/wagtail/wagtailcore/tests/test_blocks.py index 06186633d..17bf1b81e 100644 --- a/wagtail/wagtailcore/tests/test_blocks.py +++ b/wagtail/wagtailcore/tests/test_blocks.py @@ -292,7 +292,7 @@ class TestStructBlock(unittest.TestCase): 'link': 'http://www.wagtail.io', }) - self.assertEqual(content, ["Wagtail site", "http://www.wagtail.io"]) + self.assertEqual(content, ["Wagtail site"]) class TestListBlock(unittest.TestCase): @@ -447,7 +447,7 @@ class TestListBlock(unittest.TestCase): }, ]) - self.assertEqual(content, ["Wagtail", "http://www.wagtail.io", "Django", "http://www.djangoproject.com"]) + self.assertEqual(content, ["Wagtail", "Django"]) class TestStreamBlock(unittest.TestCase): From 02e6e55f3e1cb0d40bd519081455a278f7e5737d Mon Sep 17 00:00:00 2001 From: Karl Hobley Date: Wed, 18 Feb 2015 13:43:47 +0000 Subject: [PATCH 4/4] Run force_text on searchable text --- wagtail/wagtailcore/blocks.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/wagtail/wagtailcore/blocks.py b/wagtail/wagtailcore/blocks.py index b476e6eb3..d977056ec 100644 --- a/wagtail/wagtailcore/blocks.py +++ b/wagtail/wagtailcore/blocks.py @@ -361,7 +361,7 @@ class CharBlock(FieldBlock): super(CharBlock, self).__init__(**kwargs) def get_searchable_content(self, value): - return [value] + return [force_text(value)] class URLBlock(FieldBlock): @@ -380,7 +380,7 @@ class RichTextBlock(FieldBlock): return mark_safe('
' + expand_db_html(value) + '
') def get_searchable_content(self, value): - return [value] + return [force_text(value)] class RawHTMLBlock(FieldBlock):