From 56b8c9f7abe11c946f3d6a1a1aba9bbfb8147901 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 10 Apr 2020 16:19:33 +0100 Subject: [PATCH] Add tests for norobots.html was used for testing in local files until [1]. This commit reinstates local file testing and adds an http test. Checking is reported by checker.httpurl.HttpUrl.content_allows_robots(). [1] ce733ae7 ("Don't check for robots.txt directives in local html files.", 2014-03-19) --- tests/checker/data/norobots.html.result | 5 -- tests/checker/test_content_allows_robots.py | 54 +++++++++++++++++++++ 2 files changed, 54 insertions(+), 5 deletions(-) delete mode 100644 tests/checker/data/norobots.html.result create mode 100644 tests/checker/test_content_allows_robots.py diff --git a/tests/checker/data/norobots.html.result b/tests/checker/data/norobots.html.result deleted file mode 100644 index fbbf1e70..00000000 --- a/tests/checker/data/norobots.html.result +++ /dev/null @@ -1,5 +0,0 @@ -url file://%(curdir)s/%(datadir)s/norobots.html -cache key file://%(curdir)s/%(datadir)s/norobots.html -real url file://%(curdir)s/%(datadir)s/norobots.html -name %(datadir)s/norobots.html -valid diff --git a/tests/checker/test_content_allows_robots.py b/tests/checker/test_content_allows_robots.py new file mode 100644 index 00000000..605ea1e2 --- /dev/null +++ b/tests/checker/test_content_allows_robots.py @@ -0,0 +1,54 @@ +# Copyright (C) 2020 Chris Mayo +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Test that is respected when using http +and ignored when checking a local file. +""" +from . import LinkCheckTest +from .httpserver import HttpServerTest + +class TestHttpMetaRobots(HttpServerTest): + """Test using http.""" + + def test_http_meta_robots(self): + url = "http://localhost:%d/tests/checker/data/norobots.html" % self.port + resultlines = [ + "url %s" % url, + "cache key %s" % url, + "real url %s" % url, + "valid" + ] + self.direct(url, resultlines, recursionlevel=1) + +class TestFileMetaRobots(LinkCheckTest): + """Test from a file.""" + + def test_file_meta_robots(self): + datapath = "file://%(curdir)s/%(datadir)s/%%s" % self.get_attrs() + url = datapath % "norobots.html" + dncurl = datapath % "do_not_check.html" + resultlines = [ + "url %s" % url, + "cache key %s" % url, + "real url %s" % url, + "valid", + "url do_not_check.html", + "cache key %s" % dncurl, + "real url %s" % dncurl, + "name bla", + "error" + ] + self.direct(url, resultlines, recursionlevel=1)