Add tests for <meta name="robots" content="nofollow">

norobots.html was used for testing <meta name="robots"
content="nofollow"> in local files until [1]. This commit reinstates
local file testing and adds an http test.

Checking is reported by checker.httpurl.HttpUrl.content_allows_robots().

[1] ce733ae7 ("Don't check for robots.txt directives in local html
files.", 2014-03-19)
This commit is contained in:
Chris Mayo 2020-04-10 16:19:33 +01:00
parent d189445a8e
commit 56b8c9f7ab
2 changed files with 54 additions and 5 deletions

View file

@ -1,5 +0,0 @@
url file://%(curdir)s/%(datadir)s/norobots.html
cache key file://%(curdir)s/%(datadir)s/norobots.html
real url file://%(curdir)s/%(datadir)s/norobots.html
name %(datadir)s/norobots.html
valid

View file

@ -0,0 +1,54 @@
# Copyright (C) 2020 Chris Mayo
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Test that <meta name="robots" content="nofollow"> is respected when using http
and ignored when checking a local file.
"""
from . import LinkCheckTest
from .httpserver import HttpServerTest
class TestHttpMetaRobots(HttpServerTest):
"""Test <meta name="robots" content="nofollow"> using http."""
def test_http_meta_robots(self):
url = "http://localhost:%d/tests/checker/data/norobots.html" % self.port
resultlines = [
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid"
]
self.direct(url, resultlines, recursionlevel=1)
class TestFileMetaRobots(LinkCheckTest):
"""Test <meta name="robots" content="nofollow"> from a file."""
def test_file_meta_robots(self):
datapath = "file://%(curdir)s/%(datadir)s/%%s" % self.get_attrs()
url = datapath % "norobots.html"
dncurl = datapath % "do_not_check.html"
resultlines = [
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid",
"url do_not_check.html",
"cache key %s" % dncurl,
"real url %s" % dncurl,
"name bla",
"error"
]
self.direct(url, resultlines, recursionlevel=1)