diff --git a/wagtail/wagtailcore/tests/test_whitelist.py b/wagtail/wagtailcore/tests/test_whitelist.py new file mode 100644 index 000000000..28b63d9fb --- /dev/null +++ b/wagtail/wagtailcore/tests/test_whitelist.py @@ -0,0 +1,136 @@ +from bs4 import BeautifulSoup, NavigableString + +from django.test import TestCase +from wagtail.wagtailcore.whitelist import ( + check_url, + attribute_rule, + allow_without_attributes, + Whitelister +) + +class TestCheckUrl(TestCase): + def test_allowed_url_schemes(self): + for url_scheme in ['', 'http', 'https', 'ftp', 'mailto', 'tel']: + url = url_scheme + "://www.example.com" + self.assertTrue(bool(check_url(url))) + + def test_disallowed_url_scheme(self): + self.assertFalse(bool(check_url("invalid://url"))) + + +class TestAttributeRule(TestCase): + def setUp(self): + self.soup = BeautifulSoup('baz') + + def test_no_rule_for_attr(self): + """ + Test that attribute_rule() drops attributes for + which no rule has been defined. + """ + tag = self.soup.b + fn = attribute_rule({'snowman': 'barbecue'}) + fn(tag) + self.assertEqual(str(tag), 'baz') + + def test_rule_true_for_attr(self): + """ + Test that attribute_rule() does not change atrributes + when the corresponding rule returns True + """ + tag = self.soup.b + fn = attribute_rule({'foo': True}) + fn(tag) + self.assertEqual(str(tag), 'baz') + + def test_rule_false_for_attr(self): + """ + Test that attribute_rule() drops atrributes + when the corresponding rule returns False + """ + tag = self.soup.b + fn = attribute_rule({'foo': False}) + fn(tag) + self.assertEqual(str(tag), 'baz') + + def test_callable_called_on_attr(self): + """ + Test that when the rule returns a callable, + attribute_rule() replaces the attribute with + the result of calling the callable on the attribute. + """ + tag = self.soup.b + fn = attribute_rule({'foo': len}) + fn(tag) + self.assertEqual(str(tag), 'baz') + + def test_callable_returns_None(self): + """ + Test that when the rule returns a callable, + attribute_rule() replaces the attribute with + the result of calling the callable on the attribute. + """ + tag = self.soup.b + fn = attribute_rule({'foo': lambda x: None}) + fn(tag) + self.assertEqual(str(tag), 'baz') + + def test_allow_without_attributes(self): + """ + Test that attribute_rule() with will drop all + attributes. + """ + soup = BeautifulSoup('') + tag = soup.b + allow_without_attributes(tag) + self.assertEqual(str(tag), '') + + +class TestWhitelister(TestCase): + def test_clean_unknown_node(self): + """ + Unknown node should remove a node from the parent document + """ + soup = BeautifulSoup('bazquux') + tag = soup.foo + Whitelister.clean_unknown_node('', soup.bar) + self.assertEqual(str(tag), 'quux') + + def test_clean_tag_node_cleans_nested_recognised_node(self): + """ + tags are allowed without attributes. This remains true + when tags are nested. + """ + soup = BeautifulSoup('foo') + tag = soup.b + Whitelister.clean_tag_node(tag, tag) + self.assertEqual(str(tag), 'foo') + + def test_clean_tag_node_disallows_nested_unrecognised_node(self): + """ + tags should be removed, even when nested. + """ + soup = BeautifulSoup('bar') + tag = soup.b + Whitelister.clean_tag_node(tag, tag) + self.assertEqual(str(tag), 'bar') + + def test_clean_string_node_does_nothing(self): + soup = BeautifulSoup('bar') + string = soup.b.string + Whitelister.clean_string_node(string, string) + self.assertEqual(str(string), 'bar') + + def test_clean_node_does_not_change_navigable_strings(self): + soup = BeautifulSoup('bar') + string = soup.b.string + Whitelister.clean_node(string, string) + self.assertEqual(str(string), 'bar') + + def test_clean(self): + """ + Whitelister.clean should remove disallowed tags and attributes from + a string + """ + string = 'snowman Yorkshire' + cleaned_string = Whitelister.clean(string) + self.assertEqual(cleaned_string, 'snowman Yorkshire') diff --git a/wagtail/wagtailcore/whitelist.py b/wagtail/wagtailcore/whitelist.py index 508682cda..a3d377bd6 100644 --- a/wagtail/wagtailcore/whitelist.py +++ b/wagtail/wagtailcore/whitelist.py @@ -89,7 +89,10 @@ class Whitelister(object): cls.clean_string_node(doc, node) elif isinstance(node, Tag): cls.clean_tag_node(doc, node) - else: + # This branch is here in case node is a BeautifulSoup object that does + # not inherit from NavigableString or Tag. I can't find any examples + # of such a thing at the moment, so this branch is untested. + else: # pragma: no cover cls.clean_unknown_node(doc, node) @classmethod