Add testing for variants of the robots meta directive

This commit is contained in:
Chris Mayo 2020-04-29 20:07:00 +01:00
parent 12a948894b
commit 1d1d9c3bde

View file

@ -16,10 +16,19 @@
"""
Test that <meta name="robots" content="nofollow"> is respected when using http
and ignored when checking a local file.
Also test different values of the content attribute are correctly matched.
"""
import unittest
import linkcheck.configuration
import linkcheck.director
from linkcheck.htmlutil.htmlsoup import make_soup
from . import get_url_from
from . import LinkCheckTest
from .httpserver import HttpServerTest
class TestHttpMetaRobots(HttpServerTest):
"""Test <meta name="robots" content="nofollow"> using http."""
@ -33,6 +42,7 @@ class TestHttpMetaRobots(HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=1)
class TestFileMetaRobots(LinkCheckTest):
"""Test <meta name="robots" content="nofollow"> from a file."""
@ -52,3 +62,23 @@ class TestFileMetaRobots(LinkCheckTest):
"error"
]
self.direct(url, resultlines, recursionlevel=1)
class TestMetaRobotsVariants(unittest.TestCase):
"""Test different values of the robots meta directive content attribute"""
def test_nofollow_variants(self):
config = linkcheck.configuration.Configuration()
aggregate = linkcheck.director.get_aggregate(config)
url = "http://example.org"
url_data = get_url_from(url, 0, aggregate)
url_data.content_type = "text/html"
url_data.soup = make_soup('<meta name="robots" content="nofollow">')
self.assertFalse(url_data.content_allows_robots())
url_data.soup = make_soup('<meta name="robots" content="nocache, Nofollow, noimageindex">')
self.assertFalse(url_data.content_allows_robots())
url_data.soup = make_soup('<meta name="robots" content="noindex, follow">')
self.assertTrue(url_data.content_allows_robots())