linkchecker/tests/checker/__init__.py

321 lines
11 KiB
Python
Raw Normal View History

2014-01-08 21:33:04 +00:00
# Copyright (C) 2004-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2009-07-24 21:58:20 +00:00
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Define standard test support classes functional for LinkChecker tests.
"""
import os
import re
import difflib
import unittest
import linkcheck.checker
import linkcheck.configuration
import linkcheck.director
import linkcheck.logger
2011-12-17 15:38:25 +00:00
from .. import get_file
# helper alias
get_url_from = linkcheck.checker.get_url_from
2010-03-13 07:47:12 +00:00
class TestLogger(linkcheck.logger._Logger):
"""
Output logger for automatic regression tests.
"""
2020-05-28 19:29:13 +00:00
# don't attempt to collect this class because it has an __init__()
__test__ = False
2020-05-28 19:29:13 +00:00
LoggerName = "test"
2013-12-11 17:41:55 +00:00
logparts = [
2020-05-28 19:29:13 +00:00
"cachekey",
"realurl",
"name",
"base",
"info",
"warning",
"result",
"url",
]
def __init__(self, **kwargs):
"""
The kwargs must have "expected" keyword with the expected logger
output lines.
"""
2013-12-11 17:41:55 +00:00
args = self.get_args(kwargs)
2020-05-28 19:29:13 +00:00
args["parts"] = self.logparts
2020-06-03 19:06:36 +00:00
super().__init__(**args)
# list of expected output lines
2020-05-28 19:29:13 +00:00
self.expected = args["expected"]
# list of real output lines
self.result = []
# diff between expected and real output
self.diff = []
def normalize(self, result_log):
# XXX we assume that each log entry has a URL key,
# maybe we should add an assert into log_url() to that effect?
# Ensure that log entries are sorted by URL key:
# - join the result_log items together
# - split into entries (starting with a URL key)
# - sort the entries and join together
# - split the entries back into a list
2020-05-28 19:29:13 +00:00
return "\n".join(
sorted(
[
"url %s" % x.strip()
for x in re.split(
r"^url ",
2020-05-28 19:29:13 +00:00
"\n".join(result_log),
flags=re.DOTALL | re.MULTILINE,
)
if x
]
)
).splitlines()
def start_output(self):
"""
Nothing to do here.
"""
pass
def log_url(self, url_data):
"""
Append logger output to self.result.
"""
2020-05-28 19:29:13 +00:00
if self.has_part("url"):
2020-04-30 19:11:59 +00:00
url = "url %s" % url_data.base_url
self.result.append(url)
2020-05-28 19:29:13 +00:00
if self.has_part("cachekey"):
2014-03-08 18:35:10 +00:00
cache_key = url_data.cache_url if url_data.cache_url else None
2020-04-30 19:11:59 +00:00
self.result.append("cache key %s" % cache_key)
2020-05-28 19:29:13 +00:00
if self.has_part("realurl"):
2020-04-30 19:11:59 +00:00
self.result.append("real url %s" % url_data.url)
2020-05-28 19:29:13 +00:00
if self.has_part("name") and url_data.name:
2020-04-30 19:11:59 +00:00
self.result.append("name %s" % url_data.name)
2020-05-28 19:29:13 +00:00
if self.has_part("base") and url_data.base_ref:
2020-04-30 19:11:59 +00:00
self.result.append("baseurl %s" % url_data.base_ref)
2020-05-28 19:29:13 +00:00
if self.has_part("info"):
for info in url_data.info:
2020-05-28 19:29:13 +00:00
if (
"Last modified" not in info
and "is located in" not in info
):
2020-04-30 19:11:59 +00:00
self.result.append("info %s" % info)
2020-05-28 19:29:13 +00:00
if self.has_part("warning"):
for tag, warning in url_data.warnings:
2020-04-30 19:11:59 +00:00
self.result.append("warning %s" % warning)
2020-05-28 19:29:13 +00:00
if self.has_part("result"):
2020-04-30 19:11:59 +00:00
self.result.append("valid" if url_data.valid else "error")
2020-05-28 19:29:13 +00:00
if self.has_part("line"):
2020-04-30 19:11:59 +00:00
self.result.append("line %s" % url_data.line)
2020-05-28 19:29:13 +00:00
if self.has_part("col"):
2020-04-30 19:11:59 +00:00
self.result.append("col %s" % url_data.column)
2020-05-28 19:29:13 +00:00
if self.has_part("size"):
2020-04-30 19:11:59 +00:00
self.result.append("size %s" % url_data.size)
2020-05-28 19:29:13 +00:00
if self.has_part("parent_url"):
2020-04-30 19:11:59 +00:00
self.result.append("parent_url %s" % url_data.parent_url)
2020-05-28 19:29:13 +00:00
if self.has_part("page"):
2020-04-30 19:11:59 +00:00
self.result.append("page %s" % url_data.page)
2020-05-28 19:29:13 +00:00
if self.has_part("modified"):
2020-04-30 19:11:59 +00:00
self.result.append("modified %s" % url_data.modified)
2020-05-28 19:29:13 +00:00
if self.has_part("content_type"):
2020-04-30 19:11:59 +00:00
self.result.append("content_type %s" % url_data.content_type)
# note: do not append url_data.result since this is
# platform dependent
def end_output(self, linknumber=-1, **kwargs):
"""
Stores differences between expected and result in self.diff.
"""
self.expected = self.normalize(self.expected)
self.result = self.normalize(self.result)
2020-05-28 19:29:13 +00:00
self.diff = list(
difflib.unified_diff(
self.expected,
self.result,
fromfile="expected",
tofile="result",
lineterm="",
)
)
def get_file_url(filename):
return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/"))
def add_fileoutput_config(config):
2020-05-28 19:29:13 +00:00
if os.name == "posix":
devnull = "/dev/null"
elif os.name == "nt":
devnull = "NUL"
else:
return
2013-12-11 17:41:55 +00:00
for ftype in linkcheck.logger.LoggerNames:
if ftype in ("test", "failures"):
continue
logger = config.logger_new(ftype, fileoutput=1, filename=devnull)
2020-05-28 19:29:13 +00:00
config["fileoutput"].append(logger)
def get_test_aggregate(confargs, logargs, logger=TestLogger):
2009-07-24 05:20:36 +00:00
"""Initialize a test configuration object."""
config = linkcheck.configuration.Configuration()
config.logger_add(logger)
2020-05-28 19:29:13 +00:00
config["recursionlevel"] = 1
config["logger"] = config.logger_new(logger.LoggerName, **logargs)
add_fileoutput_config(config)
# uncomment for debugging
2020-05-28 19:29:13 +00:00
# config.init_logging(None, debug=["all"])
config["verbose"] = True
2020-05-28 19:29:13 +00:00
config["threads"] = 0
config["status"] = False
config["checkextern"] = True
config.update(confargs)
config.sanitize()
return linkcheck.director.get_aggregate(config)
class LinkCheckTest(unittest.TestCase):
"""
Functional test class with ability to test local files.
"""
2020-05-28 19:29:13 +00:00
logger = TestLogger
def setUp(self):
"""Ensure the current locale setting is the default.
Otherwise, warnings will get translated and will break tests."""
2020-06-03 19:06:36 +00:00
super().setUp()
2020-05-28 19:29:13 +00:00
linkcheck.init_i18n(loc="C")
def norm(self, url, encoding="utf-8"):
"""Helper function to norm a url."""
2009-11-27 20:19:40 +00:00
return linkcheck.url.url_norm(url, encoding=encoding)[0]
def get_attrs(self, **kwargs):
"""Return current and data directory as dictionary.
You can augment the dict with keyword attributes."""
d = {
2020-05-28 19:29:13 +00:00
"curdir": get_file_url(os.getcwd()),
"datadir": "tests/checker/data",
}
d.update(kwargs)
return d
def get_resultlines(self, filename):
"""
Return contents of file, as list of lines without line endings,
ignoring empty lines and lines starting with a hash sign (#).
"""
2020-04-30 19:11:59 +00:00
resultfile = get_file("%s.result" % filename)
2020-05-28 19:29:13 +00:00
d = {
"curdir": get_file_url(os.getcwd()),
"datadir": get_file_url(get_file()),
}
2011-12-17 18:13:14 +00:00
# the webserver uses the first free port number
2020-05-28 19:29:13 +00:00
if hasattr(self, "port"):
d["port"] = self.port
2009-11-27 20:19:40 +00:00
# all result files are encoded in utf-8
2020-06-05 15:59:46 +00:00
with open(resultfile, "r", encoding="utf-8") as f:
2020-05-28 19:29:13 +00:00
return [
line.rstrip("\r\n") % d
for line in f
if line.strip() and not line.startswith("#")
]
2012-11-06 20:34:22 +00:00
def get_url(self, filename):
"""Get URL for given filename."""
return get_file(filename)
def file_test(self, filename, confargs=None):
2009-07-24 05:20:36 +00:00
"""Check <filename> with expected result in <filename>.result."""
2012-11-06 20:34:22 +00:00
url = self.get_url(filename)
if confargs is None:
confargs = {}
2020-05-28 19:29:13 +00:00
logargs = {"expected": self.get_resultlines(filename)}
aggregate = get_test_aggregate(confargs, logargs, logger=self.logger)
url_data = get_url_from(url, 0, aggregate, extern=(0, 0))
aggregate.urlqueue.put(url_data)
linkcheck.director.check_urls(aggregate)
2020-05-28 19:29:13 +00:00
logger = aggregate.config["logger"]
diff = logger.diff
if diff:
msg = os.linesep.join([url] + diff)
self.fail(msg)
if logger.stats.internal_errors:
2020-05-28 19:29:13 +00:00
self.fail("%d internal errors occurred!" % logger.stats.internal_errors)
2009-11-27 20:19:40 +00:00
2020-05-28 19:29:13 +00:00
def direct(
self,
url,
resultlines,
parts=None,
recursionlevel=0,
confargs=None,
url_encoding=None,
):
2009-07-24 05:20:36 +00:00
"""Check url with expected result."""
2020-05-19 18:56:42 +00:00
assert isinstance(url, str), repr(url)
if confargs is None:
2020-05-28 19:29:13 +00:00
confargs = {"recursionlevel": recursionlevel}
else:
2020-05-28 19:29:13 +00:00
confargs["recursionlevel"] = recursionlevel
logargs = {"expected": resultlines}
if parts is not None:
2020-05-28 19:29:13 +00:00
logargs["parts"] = parts
aggregate = get_test_aggregate(confargs, logargs)
2011-03-07 11:30:40 +00:00
# initial URL has recursion level zero
url_reclevel = 0
url_data = get_url_from(url, url_reclevel, aggregate, url_encoding=url_encoding)
aggregate.urlqueue.put(url_data)
linkcheck.director.check_urls(aggregate)
2020-05-28 19:29:13 +00:00
diff = aggregate.config["logger"].diff
if diff:
d = ["Differences found testing %s" % url]
d.extend(x.rstrip() for x in diff[2:])
self.fail(os.linesep.join(d))
2012-10-01 18:11:59 +00:00
class MailTest(LinkCheckTest):
2012-10-01 18:11:59 +00:00
"""Test mailto: link checking."""
def mail_valid(self, addr, **kwargs):
2012-10-01 18:11:59 +00:00
"""Test valid mail address."""
2020-04-30 19:11:59 +00:00
return self.mail_test(addr, "valid", **kwargs)
2012-10-01 18:11:59 +00:00
def mail_error(self, addr, **kwargs):
2012-10-01 18:11:59 +00:00
"""Test error mail address."""
2020-04-30 19:11:59 +00:00
return self.mail_test(addr, "error", **kwargs)
2012-10-01 18:11:59 +00:00
def mail_test(self, addr, result, encoding="utf-8", cache_key=None, warning=None):
2012-10-01 18:11:59 +00:00
"""Test mail address."""
url = self.norm(addr, encoding=encoding)
2012-10-01 18:11:59 +00:00
if cache_key is None:
cache_key = url
resultlines = [
2020-04-30 19:11:59 +00:00
"url %s" % url,
"cache key %s" % cache_key,
"real url %s" % url,
2012-10-01 18:11:59 +00:00
]
if warning:
2020-04-30 19:11:59 +00:00
resultlines.append("warning %s" % warning)
2012-10-01 18:11:59 +00:00
resultlines.append(result)
self.direct(url, resultlines)