mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-25 06:33:43 +00:00
Add testing for variants of the robots meta directive
This commit is contained in:
parent
12a948894b
commit
1d1d9c3bde
1 changed files with 30 additions and 0 deletions
|
|
@ -16,10 +16,19 @@
|
|||
"""
|
||||
Test that <meta name="robots" content="nofollow"> is respected when using http
|
||||
and ignored when checking a local file.
|
||||
Also test different values of the content attribute are correctly matched.
|
||||
"""
|
||||
import unittest
|
||||
|
||||
import linkcheck.configuration
|
||||
import linkcheck.director
|
||||
from linkcheck.htmlutil.htmlsoup import make_soup
|
||||
from . import get_url_from
|
||||
|
||||
from . import LinkCheckTest
|
||||
from .httpserver import HttpServerTest
|
||||
|
||||
|
||||
class TestHttpMetaRobots(HttpServerTest):
|
||||
"""Test <meta name="robots" content="nofollow"> using http."""
|
||||
|
||||
|
|
@ -33,6 +42,7 @@ class TestHttpMetaRobots(HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
||||
|
||||
class TestFileMetaRobots(LinkCheckTest):
|
||||
"""Test <meta name="robots" content="nofollow"> from a file."""
|
||||
|
||||
|
|
@ -52,3 +62,23 @@ class TestFileMetaRobots(LinkCheckTest):
|
|||
"error"
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
||||
|
||||
class TestMetaRobotsVariants(unittest.TestCase):
|
||||
"""Test different values of the robots meta directive content attribute"""
|
||||
|
||||
def test_nofollow_variants(self):
|
||||
config = linkcheck.configuration.Configuration()
|
||||
aggregate = linkcheck.director.get_aggregate(config)
|
||||
url = "http://example.org"
|
||||
url_data = get_url_from(url, 0, aggregate)
|
||||
url_data.content_type = "text/html"
|
||||
|
||||
url_data.soup = make_soup('<meta name="robots" content="nofollow">')
|
||||
self.assertFalse(url_data.content_allows_robots())
|
||||
|
||||
url_data.soup = make_soup('<meta name="robots" content="nocache, Nofollow, noimageindex">')
|
||||
self.assertFalse(url_data.content_allows_robots())
|
||||
|
||||
url_data.soup = make_soup('<meta name="robots" content="noindex, follow">')
|
||||
self.assertTrue(url_data.content_allows_robots())
|
||||
|
|
|
|||
Loading…
Reference in a new issue