Run black on tests/

This commit is contained in:
Chris Mayo 2020-05-28 20:29:13 +01:00
parent 4d2449bb13
commit 165c51aeea
47 changed files with 698 additions and 528 deletions

View file

@ -46,10 +46,9 @@ def run_checked(cmd, ret_ok=(0,), **kwargs):
return retcode
def run_silent(cmd):
"""Run given command without output."""
null = open(os.name == 'nt' and ':NUL' or "/dev/null", 'w')
null = open(os.name == "nt" and ":NUL" or "/dev/null", "w")
try:
return run(cmd, stdout=null, stderr=subprocess.STDOUT)
finally:
@ -58,13 +57,16 @@ def run_silent(cmd):
def _need_func(testfunc, name):
"""Decorator skipping test if given testfunc fails."""
def check_func(func):
@wraps(func)
def newfunc(*args, **kwargs):
if not testfunc():
pytest.skip("%s is not available" % name)
return func(*args, **kwargs)
return newfunc
return check_func
@ -80,6 +82,7 @@ def has_network():
pass
return False
need_network = _need_func(has_network, "network")
@ -88,6 +91,7 @@ def has_msgfmt():
"""Test if msgfmt is available."""
return run_silent(["msgfmt", "-V"]) == 0
need_msgfmt = _need_func(has_msgfmt, "msgfmt")
@ -96,6 +100,7 @@ def has_posix():
"""Test if this is a POSIX system."""
return os.name == "posix"
need_posix = _need_func(has_posix, "POSIX system")
@ -104,6 +109,7 @@ def has_windows():
"""Test if this is a Windows system."""
return os.name == "nt"
need_windows = _need_func(has_windows, "Windows system")
@ -112,6 +118,7 @@ def has_linux():
"""Test if this is a Linux system."""
return sys.platform.startswith("linux")
need_linux = _need_func(has_linux, "Linux system")
@ -130,6 +137,7 @@ def has_clamav():
pass
return False
need_clamav = _need_func(has_clamav, "ClamAV")
@ -144,6 +152,7 @@ def has_proxy():
except Exception:
return False
need_proxy = _need_func(has_proxy, "proxy")
@ -152,10 +161,12 @@ def has_pyftpdlib():
"""Test if pyftpdlib is available."""
try:
import pyftpdlib
return True
except ImportError:
return False
need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
@ -164,16 +175,19 @@ def has_biplist():
"""Test if biplist is available."""
try:
import biplist
return True
except ImportError:
return False
need_biplist = _need_func(has_biplist, "biplist")
@lru_cache(1)
def has_newsserver(server):
import nntplib
try:
nntp = nntplib.NNTP(server, usenetrc=False)
nntp.quit()
@ -184,53 +198,62 @@ def has_newsserver(server):
def need_newsserver(server):
"""Decorator skipping test if newsserver is not available."""
def check_func(func):
def newfunc(*args, **kwargs):
if not has_newsserver(server):
pytest.skip("Newsserver `%s' is not available" % server)
return func(*args, **kwargs)
newfunc.__name__ = func.__name__
return newfunc
return check_func
return check_func
@lru_cache(1)
def has_x11():
"""Test if DISPLAY variable is set."""
return os.getenv('DISPLAY') is not None
return os.getenv("DISPLAY") is not None
need_x11 = _need_func(has_x11, 'X11')
need_x11 = _need_func(has_x11, "X11")
@lru_cache(1)
def has_word():
"""Test if Word is available."""
from linkcheck.plugins import parseword
return parseword.has_word()
need_word = _need_func(has_word, 'Word')
need_word = _need_func(has_word, "Word")
@lru_cache(1)
def has_pdflib():
from linkcheck.plugins import parsepdf
return parsepdf.has_pdflib
need_pdflib = _need_func(has_pdflib, 'pdflib')
need_pdflib = _need_func(has_pdflib, "pdflib")
@contextmanager
def _limit_time(seconds):
"""Raises LinkCheckerInterrupt if given number of seconds have passed."""
if os.name == 'posix':
if os.name == "posix":
def signal_handler(signum, frame):
raise LinkCheckerInterrupt("timed out")
old_handler = signal.getsignal(signal.SIGALRM)
signal.signal(signal.SIGALRM, signal_handler)
signal.alarm(seconds)
yield
if os.name == 'posix':
if os.name == "posix":
signal.alarm(0)
if old_handler is not None:
signal.signal(signal.SIGALRM, old_handler)
@ -238,6 +261,7 @@ def _limit_time(seconds):
def limit_time(seconds, skip=False):
"""Limit test time to the given number of seconds, else fail or skip."""
def run_limited(func):
def new_func(*args, **kwargs):
try:
@ -247,8 +271,10 @@ def limit_time(seconds, skip=False):
if skip:
pytest.skip("time limit of %d seconds exceeded" % seconds)
assert False, msg
new_func.__name__ = func.__name__
return new_func
return run_limited
@ -262,7 +288,7 @@ def get_file(filename=None):
return directory
if __name__ == '__main__':
if __name__ == "__main__":
print("has clamav", has_clamav())
print("has network", has_network())
print("has msgfmt", has_msgfmt())

View file

@ -20,21 +20,18 @@ from collections import namedtuple
from linkcheck.cache.results import ResultCache
from linkcheck.cache.urlqueue import Empty, NUM_PUTS_CLEANUP, UrlQueue
UrlData = namedtuple('UrlData', 'url cache_url aggregate has_result')
Aggregate = namedtuple('Aggregate', 'result_cache')
UrlData = namedtuple("UrlData", "url cache_url aggregate has_result")
Aggregate = namedtuple("Aggregate", "result_cache")
class TestUrlQueue(unittest.TestCase):
def setUp(self):
self.result_cache = ResultCache()
self.urlqueue = UrlQueue()
self.urldata1 = UrlData(
url="Foo",
cache_url="Foo",
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=True,
)
@ -72,9 +69,7 @@ class TestUrlQueue(unittest.TestCase):
the item and it can be get only once
"""
self.urlqueue.put(self.urldata1)
cached_item = (
self.result_cache.get_result(self.urldata1)
)
cached_item = self.result_cache.get_result(self.urldata1)
self.assertEqual(cached_item, None)
self.assertEqual(self.urlqueue.get(), self.urldata1)
with self.assertRaises(Empty):
@ -89,9 +84,7 @@ class TestUrlQueue(unittest.TestCase):
urldata = UrlData(
url="Bar",
cache_url="Bar",
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=False,
)
self.urlqueue.put(urldata)
@ -109,9 +102,7 @@ class TestUrlQueue(unittest.TestCase):
urldata = UrlData(
url="Bar",
cache_url="Bar",
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=True,
)
self.urlqueue.put(urldata)
@ -129,9 +120,7 @@ class TestUrlQueue(unittest.TestCase):
urldata = UrlData(
url="Bar",
cache_url="Foo",
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=True,
)
self.urlqueue.put(urldata)
@ -152,9 +141,7 @@ class TestUrlQueue(unittest.TestCase):
UrlData(
url="Bar",
cache_url="Bar address %s" % i,
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=False,
),
)
@ -162,9 +149,7 @@ class TestUrlQueue(unittest.TestCase):
urldata = UrlData(
url="Bar",
cache_url="Bar address",
aggregate=Aggregate(
result_cache=self.result_cache,
),
aggregate=Aggregate(result_cache=self.result_cache,),
has_result=False,
)
self.result_cache.add_result("Bar address 2", "asdf")

View file

@ -35,21 +35,21 @@ class TestLogger(linkcheck.logger._Logger):
"""
Output logger for automatic regression tests.
"""
# don't attempt to collect this class because it has an __init__()
__test__ = False
LoggerName = 'test'
LoggerName = "test"
logparts = [
'cachekey',
'realurl',
'name',
'base',
'info',
'warning',
'result',
'url',
"cachekey",
"realurl",
"name",
"base",
"info",
"warning",
"result",
"url",
]
def __init__(self, **kwargs):
@ -58,10 +58,10 @@ class TestLogger(linkcheck.logger._Logger):
output lines.
"""
args = self.get_args(kwargs)
args['parts'] = self.logparts
args["parts"] = self.logparts
super(TestLogger, self).__init__(**args)
# list of expected output lines
self.expected = args['expected']
self.expected = args["expected"]
# list of real output lines
self.result = []
# diff between expected and real output
@ -74,11 +74,19 @@ class TestLogger(linkcheck.logger._Logger):
# - split into entries (starting with a URL key)
# - sort the entries and join together
# - split the entries back into a list
return '\n'.join(
sorted(['url %s' % x.strip() for x in
re.split(r'^url .*?', '\n'.join(result_log),
flags=re.DOTALL | re.MULTILINE)
if x])).splitlines()
return "\n".join(
sorted(
[
"url %s" % x.strip()
for x in re.split(
r"^url .*?",
"\n".join(result_log),
flags=re.DOTALL | re.MULTILINE,
)
if x
]
)
).splitlines()
def start_output(self):
"""
@ -90,42 +98,44 @@ class TestLogger(linkcheck.logger._Logger):
"""
Append logger output to self.result.
"""
if self.has_part('url'):
if self.has_part("url"):
url = "url %s" % url_data.base_url
self.result.append(url)
if self.has_part('cachekey'):
if self.has_part("cachekey"):
cache_key = url_data.cache_url if url_data.cache_url else None
self.result.append("cache key %s" % cache_key)
if self.has_part('realurl'):
if self.has_part("realurl"):
self.result.append("real url %s" % url_data.url)
if self.has_part('name') and url_data.name:
if self.has_part("name") and url_data.name:
self.result.append("name %s" % url_data.name)
if self.has_part('base') and url_data.base_ref:
if self.has_part("base") and url_data.base_ref:
self.result.append("baseurl %s" % url_data.base_ref)
if self.has_part('info'):
if self.has_part("info"):
for info in url_data.info:
if "Last modified" not in info and \
"is located in" not in info and \
"Using proxy" not in info:
if (
"Last modified" not in info
and "is located in" not in info
and "Using proxy" not in info
):
self.result.append("info %s" % info)
if self.has_part('warning'):
if self.has_part("warning"):
for tag, warning in url_data.warnings:
self.result.append("warning %s" % warning)
if self.has_part('result'):
if self.has_part("result"):
self.result.append("valid" if url_data.valid else "error")
if self.has_part('line'):
if self.has_part("line"):
self.result.append("line %s" % url_data.line)
if self.has_part('col'):
if self.has_part("col"):
self.result.append("col %s" % url_data.column)
if self.has_part('size'):
if self.has_part("size"):
self.result.append("size %s" % url_data.size)
if self.has_part('parent_url'):
if self.has_part("parent_url"):
self.result.append("parent_url %s" % url_data.parent_url)
if self.has_part('page'):
if self.has_part("page"):
self.result.append("page %s" % url_data.page)
if self.has_part('modified'):
if self.has_part("modified"):
self.result.append("modified %s" % url_data.modified)
if self.has_part('content_type'):
if self.has_part("content_type"):
self.result.append("content_type %s" % url_data.content_type)
# note: do not append url_data.result since this is
# platform dependent
@ -136,10 +146,15 @@ class TestLogger(linkcheck.logger._Logger):
"""
self.expected = self.normalize(self.expected)
self.result = self.normalize(self.result)
self.diff = list(difflib.unified_diff(self.expected, self.result,
fromfile="expected",
tofile="result",
lineterm=""))
self.diff = list(
difflib.unified_diff(
self.expected,
self.result,
fromfile="expected",
tofile="result",
lineterm="",
)
)
def get_file_url(filename):
@ -147,31 +162,31 @@ def get_file_url(filename):
def add_fileoutput_config(config):
if os.name == 'posix':
devnull = '/dev/null'
elif os.name == 'nt':
devnull = 'NUL'
if os.name == "posix":
devnull = "/dev/null"
elif os.name == "nt":
devnull = "NUL"
else:
return
for ftype in linkcheck.logger.LoggerNames:
if ftype in ('test', 'blacklist'):
if ftype in ("test", "blacklist"):
continue
logger = config.logger_new(ftype, fileoutput=1, filename=devnull)
config['fileoutput'].append(logger)
config["fileoutput"].append(logger)
def get_test_aggregate(confargs, logargs, logger=TestLogger):
"""Initialize a test configuration object."""
config = linkcheck.configuration.Configuration()
config.logger_add(logger)
config['recursionlevel'] = 1
config['logger'] = config.logger_new(logger.LoggerName, **logargs)
config["recursionlevel"] = 1
config["logger"] = config.logger_new(logger.LoggerName, **logargs)
add_fileoutput_config(config)
# uncomment for debugging
#config.init_logging(None, debug=["all"])
# config.init_logging(None, debug=["all"])
config["verbose"] = True
config['threads'] = 0
config['status'] = False
config["threads"] = 0
config["status"] = False
config["checkextern"] = True
config.update(confargs)
config.sanitize()
@ -182,13 +197,14 @@ class LinkCheckTest(unittest.TestCase):
"""
Functional test class with ability to test local files.
"""
logger = TestLogger
def setUp(self):
"""Ensure the current locale setting is the default.
Otherwise, warnings will get translated and will break tests."""
super(LinkCheckTest, self).setUp()
linkcheck.init_i18n(loc='C')
linkcheck.init_i18n(loc="C")
def norm(self, url, encoding="utf-8"):
"""Helper function to norm a url."""
@ -198,8 +214,8 @@ class LinkCheckTest(unittest.TestCase):
"""Return current and data directory as dictionary.
You can augment the dict with keyword attributes."""
d = {
'curdir': get_file_url(os.getcwd()),
'datadir': "tests/checker/data",
"curdir": get_file_url(os.getcwd()),
"datadir": "tests/checker/data",
}
d.update(kwargs)
return d
@ -210,16 +226,20 @@ class LinkCheckTest(unittest.TestCase):
ignoring empty lines and lines starting with a hash sign (#).
"""
resultfile = get_file("%s.result" % filename)
d = {'curdir': get_file_url(os.getcwd()),
'datadir': get_file_url(get_file()),
}
d = {
"curdir": get_file_url(os.getcwd()),
"datadir": get_file_url(get_file()),
}
# the webserver uses the first free port number
if hasattr(self, 'port'):
d['port'] = self.port
if hasattr(self, "port"):
d["port"] = self.port
# all result files are encoded in utf-8
with codecs.open(resultfile, "r", "utf-8") as f:
return [line.rstrip('\r\n') % d for line in f
if line.strip() and not line.startswith('#')]
return [
line.rstrip("\r\n") % d
for line in f
if line.strip() and not line.startswith("#")
]
def get_url(self, filename):
"""Get URL for given filename."""
@ -230,38 +250,44 @@ class LinkCheckTest(unittest.TestCase):
url = self.get_url(filename)
if confargs is None:
confargs = {}
logargs = {'expected': self.get_resultlines(filename)}
logargs = {"expected": self.get_resultlines(filename)}
aggregate = get_test_aggregate(confargs, logargs, logger=self.logger)
url_data = get_url_from(url, 0, aggregate, extern=(0, 0))
aggregate.urlqueue.put(url_data)
linkcheck.director.check_urls(aggregate)
logger = aggregate.config['logger']
logger = aggregate.config["logger"]
diff = logger.diff
if diff:
msg = os.linesep.join([url] + diff)
self.fail(msg)
if logger.stats.internal_errors:
self.fail("%d internal errors occurred!"
% logger.stats.internal_errors)
self.fail("%d internal errors occurred!" % logger.stats.internal_errors)
def direct(self, url, resultlines, parts=None, recursionlevel=0,
confargs=None, url_encoding=None):
def direct(
self,
url,
resultlines,
parts=None,
recursionlevel=0,
confargs=None,
url_encoding=None,
):
"""Check url with expected result."""
assert isinstance(url, str), repr(url)
if confargs is None:
confargs = {'recursionlevel': recursionlevel}
confargs = {"recursionlevel": recursionlevel}
else:
confargs['recursionlevel'] = recursionlevel
logargs = {'expected': resultlines}
confargs["recursionlevel"] = recursionlevel
logargs = {"expected": resultlines}
if parts is not None:
logargs['parts'] = parts
logargs["parts"] = parts
aggregate = get_test_aggregate(confargs, logargs)
# initial URL has recursion level zero
url_reclevel = 0
url_data = get_url_from(url, url_reclevel, aggregate, url_encoding=url_encoding)
aggregate.urlqueue.put(url_data)
linkcheck.director.check_urls(aggregate)
diff = aggregate.config['logger'].diff
diff = aggregate.config["logger"].diff
if diff:
l = ["Differences found testing %s" % url]
l.extend(x.rstrip() for x in diff[2:])

View file

@ -26,13 +26,14 @@ from . import LinkCheckTest
TIMEOUT = 5
class FtpServerTest(LinkCheckTest):
"""Start/stop an FTP server that can be used for testing."""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
"""Init test class and store default ftp server port."""
super(FtpServerTest, self).__init__(methodName=methodName)
self.host = 'localhost'
self.host = "localhost"
self.port = None
def setUp(self):
@ -62,7 +63,7 @@ def start_server(host, port):
pytest.skip("pyftpdlib is not available")
return
authorizer = DummyAuthorizer()
datadir = os.path.join(os.path.dirname(__file__), 'data')
datadir = os.path.join(os.path.dirname(__file__), "data")
authorizer.add_anonymous(datadir)
# Instantiate FTP handler class

View file

@ -49,10 +49,11 @@ class StoppableHttpRequestHandler(SimpleHTTPRequestHandler):
"""
pass
# serve .xhtml files as application/xhtml+xml
StoppableHttpRequestHandler.extensions_map.update({
'.xhtml': 'application/xhtml+xml',
})
StoppableHttpRequestHandler.extensions_map.update(
{".xhtml": "application/xhtml+xml",}
)
class StoppableHttpServer(HTTPServer):
@ -79,15 +80,15 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
"""
Remove everything after a question mark.
"""
i = self.path.find('?')
i = self.path.find("?")
if i != -1:
self.path = self.path[:i]
def get_status(self):
dummy, status = self.path.rsplit('/', 1)
dummy, status = self.path.rsplit("/", 1)
status = int(status)
if status in self.responses:
return status
return status
return 500
def do_GET(self):
@ -99,7 +100,7 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
status = self.get_status()
self.send_response(status)
self.end_headers()
if status >= 200 and status not in (204, 304):
if status >= 200 and status not in (204, 304):
self.wfile.write(b"testcontent")
else:
super(NoQueryHttpRequestHandler, self).do_GET()
@ -131,9 +132,9 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
list = ["example1.txt", "example2.html", "example3"]
for name in list:
displayname = linkname = name
list_item = (
'<li><a href="%s">%s</a>\n'
% (urllib.parse.quote(linkname), html.escape(displayname))
list_item = '<li><a href="%s">%s</a>\n' % (
urllib.parse.quote(linkname),
html.escape(displayname),
)
f.write(list_item.encode())
f.write(b"</ul>\n<hr>\n</body>\n</html>\n")
@ -152,7 +153,7 @@ class HttpServerTest(LinkCheckTest):
Start/stop an HTTP server that can be used for testing.
"""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
"""
Init test class and store default http server port.
"""
@ -195,13 +196,16 @@ class HttpsServerTest(HttpServerTest):
def start_server(handler, https=False):
"""Start an HTTP server thread and return its port number."""
server_address = ('localhost', 0)
server_address = ("localhost", 0)
handler.protocol_version = "HTTP/1.0"
httpd = StoppableHttpServer(server_address, handler)
if https:
httpd.socket = ssl.wrap_socket(httpd.socket,
httpd.socket = ssl.wrap_socket(
httpd.socket,
keyfile=get_file("https_key.pem"),
certfile=get_file("https_cert.pem"), server_side=True)
certfile=get_file("https_cert.pem"),
server_side=True,
)
port = httpd.server_port
t = threading.Thread(None, httpd.serve_forever)
t.start()
@ -209,8 +213,9 @@ def start_server(handler, https=False):
while True:
try:
if https:
conn = HTTPSConnection("localhost:%d" % port,
context=ssl._create_unverified_context())
conn = HTTPSConnection(
"localhost:%d" % port, context=ssl._create_unverified_context()
)
else:
conn = HTTPConnection("localhost:%d" % port)
conn.request("GET", "/")
@ -224,8 +229,9 @@ def start_server(handler, https=False):
def stop_server(port, https=False):
"""Stop an HTTP server thread."""
if https:
conn = HTTPSConnection("localhost:%d" % port,
context=ssl._create_unverified_context())
conn = HTTPSConnection(
"localhost:%d" % port, context=ssl._create_unverified_context()
)
else:
conn = HTTPConnection("localhost:%d" % port)
conn.request("QUIT", "/")
@ -298,6 +304,7 @@ class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
else:
super(CookieRedirectHttpRequestHandler, self).do_HEAD()
class CGIHandler(CGIHTTPRequestHandler, StoppableHttpRequestHandler):
cgi_path = "/tests/checker/cgi-bin/"
@ -305,7 +312,9 @@ class CGIHandler(CGIHTTPRequestHandler, StoppableHttpRequestHandler):
# CGIHTTPRequestHandler.is_cgi() can only handle a single-level path
# override so that we can store scripts under /tests/checker
if CGIHandler.cgi_path in self.path:
self.cgi_info = (CGIHandler.cgi_path,
os.path.relpath(self.path, CGIHandler.cgi_path))
self.cgi_info = (
CGIHandler.cgi_path,
os.path.relpath(self.path, CGIHandler.cgi_path),
)
return True
return False

View file

@ -25,13 +25,14 @@ from . import LinkCheckTest
TIMEOUT = 5
class TelnetServerTest(LinkCheckTest):
"""Start/stop a Telnet server that can be used for testing."""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
"""Init test class and store default ftp server port."""
super(TelnetServerTest, self).__init__(methodName=methodName)
self.host = 'localhost'
self.host = "localhost"
self.port = None
self.stop_event = threading.Event()
self.server_thread = None
@ -62,9 +63,11 @@ class TelnetServerTest(LinkCheckTest):
def start_server(host, port, stop_event):
# Instantiate Telnet server class and listen to host:port
clients = []
def on_connect(client):
clients.append(client)
client.send("Telnet test server\nlogin: ")
server = miniboa.TelnetServer(port=port, address=host, on_connect=on_connect)
port = server.server_socket.getsockname()[1]
t = threading.Thread(None, serve_forever, args=(server, clients, stop_event))
@ -97,7 +100,7 @@ def serve_forever(server, clients, stop_event):
def handle_cmd(client):
"""Handle telnet clients."""
msg = client.get_command().lower()
if msg == 'exit':
if msg == "exit":
client.active = False
else:
client.send("Password: ")

View file

@ -24,22 +24,23 @@ from . import TestLogger
bs_has_linenos = BeautifulSoup("<a>", "html.parser").a.sourceline is not None
class AllPartsLogger(TestLogger):
logparts = [
'cachekey',
'realurl',
'name',
'base',
'info',
'warning',
'result',
'url',
'line',
'col',
'size',
'parent_url',
'page',
'content_type',
"cachekey",
"realurl",
"name",
"base",
"info",
"warning",
"result",
"url",
"line",
"col",
"size",
"parent_url",
"page",
"content_type",
]
@ -47,14 +48,15 @@ class TestAllParts(LinkCheckTest):
"""
Test that all parts of logger are working properly.
"""
logger = AllPartsLogger
@pytest.mark.skipif(bs_has_linenos,
reason="Beautiful Soup supports line numbers")
@pytest.mark.skipif(bs_has_linenos, reason="Beautiful Soup supports line numbers")
def test_all_parts(self):
self.file_test("all_parts.html")
@pytest.mark.skipif(not bs_has_linenos,
reason="Beautiful Soup does not support line numbers")
@pytest.mark.skipif(
not bs_has_linenos, reason="Beautiful Soup does not support line numbers"
)
def test_all_parts_linenos(self):
self.file_test("all_parts_linenos.html")

View file

@ -38,4 +38,3 @@ class TestAnchor(LinkCheckTest):
"valid",
]
self.direct(urlanchor, resultlines, confargs=confargs)

View file

@ -38,7 +38,7 @@ class TestHttpMetaRobots(HttpServerTest):
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid"
"valid",
]
self.direct(url, resultlines, recursionlevel=1)
@ -59,7 +59,7 @@ class TestFileMetaRobots(LinkCheckTest):
"cache key %s" % dncurl,
"real url %s" % dncurl,
"name bla",
"error"
"error",
]
self.direct(url, resultlines, recursionlevel=1)
@ -77,7 +77,9 @@ class TestMetaRobotsVariants(unittest.TestCase):
url_data.soup = make_soup('<meta name="robots" content="nofollow">')
self.assertFalse(url_data.content_allows_robots())
url_data.soup = make_soup('<meta name="robots" content="nocache, Nofollow, noimageindex">')
url_data.soup = make_soup(
'<meta name="robots" content="nocache, Nofollow, noimageindex">'
)
self.assertFalse(url_data.content_allows_robots())
url_data.soup = make_soup('<meta name="robots" content="noindex, follow">')

View file

@ -29,7 +29,7 @@ class TestError(LinkCheckTest):
# Unrecognized scheme
url = "hutzli:"
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
attrs["nurl"] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
"url file://%(curdir)s/%(url)s" % attrs,
"cache key %(nurl)s" % attrs,
@ -42,7 +42,7 @@ class TestError(LinkCheckTest):
# invalid scheme chars
url = "äöü:"
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
attrs["nurl"] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
"url file://%(curdir)s/%(url)s" % attrs,
"cache key %(nurl)s" % attrs,
@ -56,7 +56,7 @@ class TestError(LinkCheckTest):
# missing scheme alltogether
url = "äöü"
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
attrs["nurl"] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
"url file://%(curdir)s/%(url)s" % attrs,
"cache key %(nurl)s" % attrs,
@ -70,7 +70,7 @@ class TestError(LinkCheckTest):
# really fucked up
url = "@³²¼][½ ³@] ¬½"
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
attrs["nurl"] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
"url file://%(curdir)s/%(url)s" % attrs,
"cache key %(nurl)s" % attrs,

View file

@ -30,10 +30,10 @@ def unzip(filename, targetdir):
"""Unzip given zipfile into targetdir."""
zf = zipfile.ZipFile(filename)
for name in zf.namelist():
if name.endswith('/'):
if name.endswith("/"):
os.mkdir(os.path.join(targetdir, name), 0o700)
else:
outfile = open(os.path.join(targetdir, name), 'wb')
outfile = open(os.path.join(targetdir, name), "wb")
try:
outfile.write(zf.read(name))
finally:
@ -91,7 +91,7 @@ class TestFile(LinkCheckTest):
def test_directory_listing(self):
# unpack non-unicode filename which cannot be stored
# in the SF subversion repository
if os.name != 'posix' or sys.platform != 'linux2':
if os.name != "posix" or sys.platform != "linux2":
return
dirname = get_file("dir")
if not os.path.isdir(dirname):
@ -114,7 +114,7 @@ class TestFile(LinkCheckTest):
self.direct(url, resultlines)
def test_bad_file(self):
if os.name == 'nt':
if os.name == "nt":
# Fails on NT platforms and I am too lazy to fix
# Cause: url get quoted %7C which gets lowercased to
# %7c and this fails.

View file

@ -28,20 +28,20 @@ class TestFtp(FtpServerTest):
# ftp two slashes
url = "ftp://%s:%d/" % (self.host, self.port)
resultlines = [
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid",
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid",
]
# ftp use/password
user = "anonymous"
passwd = "Ftp"
url = "ftp://%s:%s@%s:%d/" % (user, passwd, self.host, self.port)
resultlines = [
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid",
"url %s" % url,
"cache key %s" % url,
"real url %s" % url,
"valid",
]
self.direct(url, resultlines)
# ftp one slash
@ -68,7 +68,7 @@ class TestFtp(FtpServerTest):
url = "ftp://%s:%d/base" % (self.host, self.port)
nurl = self.norm(url)
resultlines = [
"url %s" % url,
"url %s" % url,
"cache key %s" % nurl,
"real url %s/" % nurl,
"warning Missing trailing directory slash in ftp url.",

View file

@ -20,10 +20,11 @@ Test http checking.
from tests import need_network
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
class TestHttp(HttpServerTest):
"""Test http:// link checking."""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
super(TestHttp, self).__init__(methodName=methodName)
self.handler = CookieRedirectHttpRequestHandler
@ -58,7 +59,9 @@ class TestHttp(HttpServerTest):
resultlines.append("warning No Content")
if status == 429:
resultlines.append("warning Rate limited (Retry-After: None)")
if (status not in [101, 102] and status < 200) or (status >= 400 and status != 429):
if (status not in [101, 102] and status < 200) or (
status >= 400 and status != 429
):
result = "error"
else:
result = "valid"

View file

@ -19,6 +19,7 @@ Test http checking.
from .httpserver import HttpServerTest
from tests import need_network
class TestHttpMisc(HttpServerTest):
"""Test http:// misc link checking."""

View file

@ -19,10 +19,11 @@ Test http checking.
from tests import need_network
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
class TestHttpRedirect(HttpServerTest):
"""Test http:// link redirection checking."""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
super(TestHttpRedirect, self).__init__(methodName=methodName)
self.handler = CookieRedirectHttpRequestHandler
@ -68,17 +69,17 @@ class TestHttpRedirect(HttpServerTest):
def redirect4(self):
url = "http://localhost:%d/redirect_newscheme_ftp" % self.port
nurl = url
#rurl = "ftp://example.com/"
# rurl = "ftp://example.com/"
resultlines = [
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % nurl,
# don't allow ftp redirects
#"info Redirected to `%s'." % rurl,
#"valid",
#"url %s" % rurl,
#"cache key %s" % rurl,
#"real url %s" % rurl,
# "info Redirected to `%s'." % rurl,
# "valid",
# "url %s" % rurl,
# "cache key %s" % rurl,
# "real url %s" % rurl,
"error",
]
self.direct(url, resultlines, recursionlevel=99)
@ -86,21 +87,21 @@ class TestHttpRedirect(HttpServerTest):
def redirect5(self):
url = "http://localhost:%d/redirect_newscheme_file" % self.port
nurl = url
#rurl = "file:README"
#rnurl = "file:///README"
# rurl = "file:README"
# rnurl = "file:///README"
resultlines = [
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % nurl,
# don't allow file redirects
#"info Redirected to `%s'." % rurl,
#"warning Redirection to url `%s' is not allowed." % rnurl,
# "info Redirected to `%s'." % rurl,
# "warning Redirection to url `%s' is not allowed." % rnurl,
"error",
]
self.direct(url, resultlines, recursionlevel=99)
def redirect6(self):
#max_redirect = 10
# max_redirect = 10
# url = "http://httpbin.org/redirect/" + max_redirect --> valid
# url = "http://httpbin.org/redirect/" + (max_redirect+1) --> error
pass # XXX
pass # XXX

View file

@ -18,6 +18,7 @@ Test http checking.
"""
from .httpserver import HttpServerTest
class TestHttpRobots(HttpServerTest):
"""Test robots.txt link checking behaviour."""

View file

@ -55,11 +55,7 @@ class TestHttpbin(LinkCheckTest):
password = "testpassword"
url = get_httpbin_url("/basic-auth/%s/%s" % (user, password))
nurl = self.norm(url)
entry = dict(
user=user,
password=password,
pattern=re.compile(r'.*'),
)
entry = dict(user=user, password=password, pattern=re.compile(r".*"),)
confargs = dict(authentication=[entry])
resultlines = [
"url %s" % url,

View file

@ -30,7 +30,7 @@ class TestHttps(HttpsServerTest):
Test https: link checking.
"""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
super(TestHttps, self).__init__(methodName=methodName)
self.handler = CookieRedirectHttpRequestHandler
@ -45,7 +45,7 @@ class TestHttps(HttpsServerTest):
cert.set_notAfter(b"21190102030405Z")
cert.set_issuer(cert.get_subject())
cert.set_pubkey(key)
cert.sign(key, 'sha1')
cert.sign(key, "sha1")
with open(get_file("https_key.pem"), "wb") as f:
f.write(crypto.dump_privatekey(crypto.FILETYPE_PEM, key))
with open(get_file("https_cert.pem"), "wb") as f:
@ -59,13 +59,12 @@ class TestHttps(HttpsServerTest):
"real url %s" % url,
"valid",
]
confargs = dict(
sslverify=False
)
confargs = dict(sslverify=False)
self.direct(url, resultlines, recursionlevel=0, confargs=confargs)
def test_x509_to_dict(self):
with open(get_file("https_cert.pem"), "rb") as f:
cert = crypto.load_certificate(crypto.FILETYPE_PEM, f.read())
self.assertEqual(httputil.x509_to_dict(cert)["notAfter"],
"Jan 02 03:04:05 2119 GMT")
self.assertEqual(
httputil.x509_to_dict(cert)["notAfter"], "Jan 02 03:04:05 2119 GMT"
)

View file

@ -18,34 +18,34 @@ Test http checking.
"""
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
class TestHttpsRedirect(HttpServerTest):
"""Test https:// link redirection checking."""
def __init__(self, methodName='runTest'):
def __init__(self, methodName="runTest"):
super(TestHttpsRedirect, self).__init__(methodName=methodName)
self.handler = RedirectHttpsRequestHandler
def test_redirect(self):
url = "http://localhost:%d/redirect1" % self.port
nurl = url
#rurl = "https://localhost:%d/newurl1" % self.port
# rurl = "https://localhost:%d/newurl1" % self.port
resultlines = [
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % url,
# XXX the redirect fails because this is not an SSL server
#"info Redirected to `%s'." % rurl.replace('http:', 'https:'),
#"valid",
#"url %s" % rurl,
#"cache key %s" % rurl,
#"real url %s" % rurl,
# "info Redirected to `%s'." % rurl.replace('http:', 'https:'),
# "valid",
# "url %s" % rurl,
# "cache key %s" % rurl,
# "real url %s" % rurl,
"error",
]
self.direct(url, resultlines, recursionlevel=0)
class RedirectHttpsRequestHandler(CookieRedirectHttpRequestHandler):
def redirect(self):
"""Redirect request."""
path = self.path.replace("redirect", "newurl")
@ -54,4 +54,3 @@ class RedirectHttpsRequestHandler(CookieRedirectHttpRequestHandler):
self.send_response(302)
self.send_header("Location", url)
self.end_headers()

View file

@ -31,4 +31,4 @@ class TestInternpat(LinkCheckTest):
url = "http://example.org/foo/"
url_data = get_url_from(url, 0, aggregate)
internpat = url_data.get_intern_pattern()
self.assertTrue(internpat.endswith('/'))
self.assertTrue(internpat.endswith("/"))

View file

@ -22,6 +22,7 @@ import re
from .httpserver import HttpServerTest, CGIHandler
from . import get_test_aggregate
class TestLoginUrl(HttpServerTest):
"""Test loginurl retrieval, search and posting credentials."""
@ -34,10 +35,13 @@ class TestLoginUrl(HttpServerTest):
confargs["loginurl"] = self.get_url(page)
if extrafields:
confargs["loginextrafields"] = {"extra_field": "default"}
confargs["authentication"] = [{
"user": user, "password": password,
"pattern": re.compile("^http://localhost.*")
}]
confargs["authentication"] = [
{
"user": user,
"password": password,
"pattern": re.compile("^http://localhost.*"),
}
]
aggregate = get_test_aggregate(confargs, {"expected": ""})
aggregate.visit_loginurl()
@ -45,8 +49,9 @@ class TestLoginUrl(HttpServerTest):
return aggregate.cookies
def test_loginurl(self):
cookies = self.visit_loginurl("loginform.html", "test_user",
"test_password", True)
cookies = self.visit_loginurl(
"loginform.html", "test_user", "test_password", True
)
self.assertEqual(cookies["login"], "test_user")
self.assertEqual(cookies["password"], "test_password")
@ -58,7 +63,8 @@ class TestLoginUrl(HttpServerTest):
self.assertEqual(cookies["login"], "test_user")
def test_login_password(self):
cookies = self.visit_loginurl("loginform_password.html",
password="test_password")
cookies = self.visit_loginurl(
"loginform_password.html", password="test_password"
)
self.assertEqual(cookies["password"], "test_password")

View file

@ -27,22 +27,24 @@ class TestMailBad(MailTest):
self.mail_error("mailto:@")
self.mail_error("mailto:@example.org")
self.mail_error("mailto:a@")
self.mail_error("mailto:%s@example.org" % ("a"*65))
self.mail_error('mailto:a@%s.com' % ("a"*64))
self.mail_error("mailto:%s@example.org" % ("a" * 65))
self.mail_error("mailto:a@%s.com" % ("a" * 64))
# local part quoted
self.mail_error('mailto:"a""@example.com', cache_key='mailto:a')
self.mail_error('mailto:"a""@example.com', cache_key="mailto:a")
self.mail_error('mailto:""a"@example.com', cache_key='mailto:""a"@example.com')
self.mail_error('mailto:"a\\"@example.com', cache_key='mailto:a"@example.com')
# local part unqouted
self.mail_error('mailto:.a@example.com')
self.mail_error('mailto:a.@example.com')
self.mail_error('mailto:a..b@example.com')
self.mail_error("mailto:.a@example.com")
self.mail_error("mailto:a.@example.com")
self.mail_error("mailto:a..b@example.com")
# domain part
self.mail_error('mailto:a@a_b.com')
self.mail_error('mailto:a@example.com.')
self.mail_error('mailto:a@example.com.111')
self.mail_error('mailto:a@example..com')
self.mail_error("mailto:a@a_b.com")
self.mail_error("mailto:a@example.com.")
self.mail_error("mailto:a@example.com.111")
self.mail_error("mailto:a@example..com")
# other
# ? extension forbidden in <> construct
self.mail_error("mailto:Bastian Kleineidam <calvin@users.sourceforge.net?foo=bar>",
cache_key="mailto:calvin@users.sourceforge.net?foo=bar")
self.mail_error(
"mailto:Bastian Kleineidam <calvin@users.sourceforge.net?foo=bar>",
cache_key="mailto:calvin@users.sourceforge.net?foo=bar",
)

View file

@ -28,22 +28,26 @@ class TestMailGood(MailTest):
@need_network
def test_good_mail(self):
# some good mailto addrs
url = self.norm("mailto:Dude <calvin@users.sourceforge.net> , "\
"Killer <calvin@users.sourceforge.net>?subject=bla")
url = self.norm(
"mailto:Dude <calvin@users.sourceforge.net> , "
"Killer <calvin@users.sourceforge.net>?subject=bla"
)
resultlines = [
"url %s" % url,
"cache key mailto:calvin@users.sourceforge.net",
"real url %s" % url,
"valid",
"url %s" % url,
"cache key mailto:calvin@users.sourceforge.net",
"real url %s" % url,
"valid",
]
self.direct(url, resultlines)
url = self.norm("mailto:Bastian Kleineidam <calvin@users.sourceforge.net>?"\
"bcc=calvin%40users.sourceforge.net")
url = self.norm(
"mailto:Bastian Kleineidam <calvin@users.sourceforge.net>?"
"bcc=calvin%40users.sourceforge.net"
)
resultlines = [
"url %s" % url,
"cache key mailto:calvin@users.sourceforge.net",
"real url %s" % url,
"valid",
"url %s" % url,
"cache key mailto:calvin@users.sourceforge.net",
"real url %s" % url,
"valid",
]
self.direct(url, resultlines)
url = self.norm("mailto:Bastian Kleineidam <calvin@users.sourceforge.net>")
@ -62,19 +66,23 @@ class TestMailGood(MailTest):
"valid",
]
self.direct(url, resultlines)
url = self.norm("mailto:?to=calvin@users.sourceforge.net&subject=blubb&"
"cc=calvin_cc@users.sourceforge.net&CC=calvin_CC@users.sourceforge.net")
url = self.norm(
"mailto:?to=calvin@users.sourceforge.net&subject=blubb&"
"cc=calvin_cc@users.sourceforge.net&CC=calvin_CC@users.sourceforge.net"
)
resultlines = [
"url %s" % url,
"cache key mailto:calvin@users.sourceforge.net,"
"calvin_CC@users.sourceforge.net,calvin_cc@users.sourceforge.net",
"calvin_CC@users.sourceforge.net,calvin_cc@users.sourceforge.net",
"real url %s" % url,
"valid",
]
self.direct(url, resultlines)
url = self.norm("mailto:news-admins@freshcode.club?subject="
"Re:%20[fm%20#11093]%20(news-admins)%20Submission%20"
"report%20-%20Pretty%20CoLoRs")
url = self.norm(
"mailto:news-admins@freshcode.club?subject="
"Re:%20[fm%20#11093]%20(news-admins)%20Submission%20"
"report%20-%20Pretty%20CoLoRs"
)
resultlines = [
"url %s" % url,
"cache key mailto:news-admins@freshcode.club",
@ -117,10 +125,9 @@ class TestMailGood(MailTest):
def _mail_valid_unverified(self, char):
# valid mail addresses
addr = 'abc%sdef@sourceforge.net' % char
addr = "abc%sdef@sourceforge.net" % char
url = "mailto:%s" % addr
self.mail_valid(url,
cache_key=url)
self.mail_valid(url, cache_key=url)
@need_network
def test_valid_mail1(self):

View file

@ -25,8 +25,9 @@ from . import LinkCheckTest
# to enable the has_newsserver() resource manually.
NNTP_SERVER = "news.uni-stuttgart.de"
# info string returned by news server
NNTP_INFO = "200 news.uni-stuttgart.de InterNetNews NNRP server " \
"INN 2.5.2 ready (no posting)"
NNTP_INFO = (
"200 news.uni-stuttgart.de InterNetNews NNRP server " "INN 2.5.2 ready (no posting)"
)
# Most free NNTP servers are slow, so don't waist a lot of time running those.
NNTP_TIMEOUT_SECS = 30

View file

@ -20,6 +20,7 @@ from test.support import EnvironmentVarGuard
from . import httpserver
class TestProxy(httpserver.HttpServerTest):
"""Test no_proxy env var handling."""

View file

@ -28,7 +28,7 @@ class TestURLLength(LinkCheckTest):
def test_url_warn(self):
url = "http://www.example.org/" + ("a" * URL_MAX_LENGTH)
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm(url)
attrs["nurl"] = self.norm(url)
resultlines = [
"url %(nurl)s" % attrs,
"cache key %(nurl)s" % attrs,

View file

@ -30,7 +30,7 @@ class TestWhitespace(LinkCheckTest):
# Leading whitespace
url = " http://www.example.org/"
attrs = self.get_attrs(url=url)
attrs['surl'] = url.strip()
attrs["surl"] = url.strip()
resultlines = [
"url %(surl)s" % attrs,
"cache key %(surl)s" % attrs,
@ -41,7 +41,7 @@ class TestWhitespace(LinkCheckTest):
self.direct(url, resultlines)
url = "\nhttp://www.example.org/"
attrs = self.get_attrs(url=url)
attrs['surl'] = url.strip()
attrs["surl"] = url.strip()
resultlines = [
"url %(surl)s" % attrs,
"cache key %(surl)s" % attrs,

View file

@ -80,13 +80,24 @@ class TestConfig(unittest.TestCase):
# output section
self.assertTrue(linkcheck.log.is_debug(linkcheck.LOG_THREAD))
self.assertFalse(config["status"])
self.assertTrue(isinstance(config["logger"], linkcheck.logger.customxml.CustomXMLLogger))
self.assertTrue(
isinstance(config["logger"], linkcheck.logger.customxml.CustomXMLLogger)
)
self.assertTrue(config["verbose"])
self.assertTrue(config["warnings"])
self.assertFalse(config["quiet"])
self.assertEqual(len(config["fileoutput"]), 8)
# plugins
for plugin in ("AnchorCheck", "CssSyntaxCheck", "HtmlSyntaxCheck", "LocationInfo", "RegexCheck", "SslCertificateCheck", "VirusCheck", "HttpHeaderInfo"):
for plugin in (
"AnchorCheck",
"CssSyntaxCheck",
"HtmlSyntaxCheck",
"LocationInfo",
"RegexCheck",
"SslCertificateCheck",
"VirusCheck",
"HttpHeaderInfo",
):
self.assertTrue(plugin in config["enabledplugins"])
# text logger section
self.assertEqual(config["text"]["filename"], "imadoofus.txt")

View file

@ -57,7 +57,7 @@ def quote_attrval(s):
for c in s:
if ord(c) <= 127:
# ASCII
if c == '&':
if c == "&":
res.append("&amp;")
elif c == '"':
res.append("&quot;")

View file

@ -20,7 +20,6 @@ from linkcheck.logger.csvlog import CSVLogger
class TestCsvLogger(unittest.TestCase):
def test_parts(self):
args = dict(
filename=os.path.join(os.path.dirname(__file__), "testlog.csv"),

View file

@ -23,6 +23,7 @@ from wsgiref.util import setup_testing_defaults
from linkcheck.lc_cgi import checkform, checklink, LCFormError, application
from linkcheck.strformat import limit
class TestWsgi(unittest.TestCase):
"""Test wsgi application."""
@ -57,19 +58,21 @@ class TestWsgi(unittest.TestCase):
def test_application(self):
form = dict(url="http://www.example.com/", level="0")
formdata = urllib.parse.urlencode(form)
formdata = formdata.encode('ascii')
environ = {'wsgi.input': BytesIO(formdata)}
formdata = formdata.encode("ascii")
environ = {"wsgi.input": BytesIO(formdata)}
setup_testing_defaults(environ)
test_response = b""
test_headers = [None]
test_status = [None]
def start_response(status, headers):
test_status[0] = status
test_headers[0] = headers
for str_data in application(environ, start_response):
if not isinstance(str_data, bytes):
err = "answer is not a byte string: %r" % limit(str_data, 30)
self.assertTrue(False, err)
test_response += str_data
self.assertEqual(test_status[0], '200 OK')
self.assertEqual(test_status[0], "200 OK")
self.assertTrue(b"Generated by LinkChecker" in test_response)

View file

@ -22,7 +22,6 @@ from linkcheck.plugins import viruscheck as clamav
class TestClamav(unittest.TestCase):
def setUp(self):
self.clamav_conf = clamav.get_clamav_conf("/etc/clamav/clamd.conf")
@ -37,25 +36,27 @@ class TestClamav(unittest.TestCase):
def testInfected(self):
# from the clamav test direcotry: the clamav test file as html data
data = (
b'<a href="data:application/octet-stream;base64,'
b'TVpQAAIAAAAEAA8A//8AALgAAAAhAAAAQAAaAAAAAAAAAAAAAAAAAAAAAAAAAA'
b'AAAAAAAAAAAAAAAAAAAAEAALtxEEAAM8BQUIvzU1NQsClAMARmrHn5ujEAeA2t'
b'UP9mcA4fvjEA6eX/tAnNIbRMzSFiDAoBAnB2FwIeTgwEL9rMEAAAAAAAAAAAAA'
b'AAAAAAwBAAAIAQAAAAAAAAAAAAAAAAAADaEAAA9BAAAAAAAAAAAAAAAAAAAAAA'
b'AAAAAAAAS0VSTkVMMzIuRExMAABFeGl0UHJvY2VzcwBVU0VSMzIuRExMAENMQU'
b'1lc3NhZ2VCb3hBAOYQAAAAAAAAPz8/P1BFAABMAQEAYUNhQgAAAAAAAAAA4ACO'
b'gQsBAhkABAAAAAYAAAAAAABAEAAAABAAAEAAAAAAAEAAABAAAAACAAABAAAAAA'
b'AAAAMACgAAAAAAACAAAAAEAAAAAAAAAgAAAAAAEAAAIAAAAAAQAAAQAAAAAAAA'
b'EAAAAAAAAAAAAAAAhBAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA'
b'AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAW0NMQU1BVl'
b'0AEAAAABAAAAACAAABAAAAAAAAAAAAAAAAAAAAAAAAwA==">t</a>'
b'<a href="data:application/octet-stream;base64,'
b"TVpQAAIAAAAEAA8A//8AALgAAAAhAAAAQAAaAAAAAAAAAAAAAAAAAAAAAAAAAA"
b"AAAAAAAAAAAAAAAAAAAAEAALtxEEAAM8BQUIvzU1NQsClAMARmrHn5ujEAeA2t"
b"UP9mcA4fvjEA6eX/tAnNIbRMzSFiDAoBAnB2FwIeTgwEL9rMEAAAAAAAAAAAAA"
b"AAAAAAwBAAAIAQAAAAAAAAAAAAAAAAAADaEAAA9BAAAAAAAAAAAAAAAAAAAAAA"
b"AAAAAAAAS0VSTkVMMzIuRExMAABFeGl0UHJvY2VzcwBVU0VSMzIuRExMAENMQU"
b"1lc3NhZ2VCb3hBAOYQAAAAAAAAPz8/P1BFAABMAQEAYUNhQgAAAAAAAAAA4ACO"
b"gQsBAhkABAAAAAYAAAAAAABAEAAAABAAAEAAAAAAAEAAABAAAAACAAABAAAAAA"
b"AAAAMACgAAAAAAACAAAAAEAAAAAAAAAgAAAAAAEAAAIAAAAAAQAAAQAAAAAAAA"
b"EAAAAAAAAAAAAAAAhBAAAIAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA"
b"AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAW0NMQU1BVl"
b'0AEAAAABAAAAACAAABAAAAAAAAAAAAAAAAAAAAAAAAwA==">t</a>'
)
infected, errors = clamav.scan(data, self.clamav_conf)
# different versions of clamav report different responses, apparently
acceptable_responses = (
['stream: ClamAV-Test-File(2d1206194bd704385e37000be6113f73:781) FOUND\n'],
['stream: Clamav.Test.File-6(aa15bcf478d165efd2065190eb473bcb:544) FOUND\n'],
["stream: ClamAV-Test-File(2d1206194bd704385e37000be6113f73:781) FOUND\n"],
[
"stream: Clamav.Test.File-6(aa15bcf478d165efd2065190eb473bcb:544) FOUND\n"
],
)
self.assertIn(infected, acceptable_responses)
self.assertFalse(errors)

View file

@ -30,8 +30,8 @@ class TestCookies(unittest.TestCase):
def test_cookie_parse_multiple_headers(self):
lines = [
'Host: example.org',
'Path: /hello',
"Host: example.org",
"Path: /hello",
'Set-cookie: ID="smee"',
'Set-cookie: spam="egg"',
]
@ -41,14 +41,14 @@ class TestCookies(unittest.TestCase):
for cookie in cookies:
self.assertEqual(cookie.domain, "example.org")
self.assertEqual(cookie.path, "/hello")
self.assertEqual(cookies[0].name, 'ID')
self.assertEqual(cookies[0].value, 'smee')
self.assertEqual(cookies[1].name, 'spam')
self.assertEqual(cookies[1].value, 'egg')
self.assertEqual(cookies[0].name, "ID")
self.assertEqual(cookies[0].value, "smee")
self.assertEqual(cookies[1].name, "spam")
self.assertEqual(cookies[1].value, "egg")
def test_cookie_parse_multiple_values(self):
lines = [
'Host: example.org',
"Host: example.org",
'Set-cookie: baggage="elitist"; comment="hologram"',
]
from_headers = linkcheck.cookies.from_headers
@ -57,14 +57,14 @@ class TestCookies(unittest.TestCase):
for cookie in cookies:
self.assertEqual(cookie.domain, "example.org")
self.assertEqual(cookie.path, "/")
self.assertEqual(cookies[0].name, 'baggage')
self.assertEqual(cookies[0].value, 'elitist')
self.assertEqual(cookies[1].name, 'comment')
self.assertEqual(cookies[1].value, 'hologram')
self.assertEqual(cookies[0].name, "baggage")
self.assertEqual(cookies[0].value, "elitist")
self.assertEqual(cookies[1].name, "comment")
self.assertEqual(cookies[1].value, "hologram")
def test_cookie_parse_error(self):
lines = [
' Host: imaweevil.org',
" Host: imaweevil.org",
'Set-cookie: baggage="elitist"; comment="hologram"',
]
from_headers = linkcheck.cookies.from_headers
@ -74,9 +74,8 @@ class TestCookies(unittest.TestCase):
# Regression test for https://github.com/linkchecker/linkchecker/issues/62
config = linkcheck.configuration.Configuration()
here = os.path.dirname(__file__)
config['cookiefile'] = os.path.join(here, 'cookies.txt')
config["cookiefile"] = os.path.join(here, "cookies.txt")
aggregate = linkcheck.director.get_aggregate(config)
aggregate.add_request_session()
session = aggregate.get_request_session()
self.assertEqual({c.name for c in session.cookies},
{'om', 'multiple', 'are'})
self.assertEqual({c.name for c in session.cookies}, {"om", "multiple", "are"})

View file

@ -33,13 +33,16 @@ class TestDecorators(unittest.TestCase):
@linkcheck.decorators.timed()
def f():
return 42
self.assertEqual(f(), 42)
def test_timeit2(self):
log = StringIO()
@linkcheck.decorators.timed(log=log, limit=0)
def f():
time.sleep(1)
return 42
self.assertEqual(f(), 42)
self.assertEqual(log.getvalue(), 'f took 1.00 seconds\n()\n{}\n')
self.assertEqual(log.getvalue(), "f took 1.00 seconds\n()\n{}\n")

View file

@ -33,11 +33,11 @@ class TestFilenames(unittest.TestCase):
path = os.getcwd()
realpath = get_nt_filename(path)
self.assertEqual(path, realpath)
path = 'c:\\'
path = "c:\\"
realpath = get_nt_filename(path)
self.assertEqual(path, realpath)
# XXX Only works on my computer.
# Is there a Windows UNC share that is always available for tests?
#path = '\\Vboxsrv\share\msg.txt'
#realpath = get_nt_filename(path)
#self.assertEqual(path, realpath)
# path = '\\Vboxsrv\share\msg.txt'
# realpath = get_nt_filename(path)
# self.assertEqual(path, realpath)

View file

@ -23,57 +23,95 @@ from linkcheck.ftpparse import ftpparse
patterns = (
# EPLF format
# http://pobox.com/~djb/proto/eplf.html
("+i8388621.29609,m824255902,/,\tdev",
dict(name='dev', tryretr=False, trycwd=True)),
("+i8388621.44468,m839956783,r,s10376,\tRFCEPLF",
dict(name='RFCEPLF', tryretr=True, trycwd=False)),
(
"+i8388621.29609,m824255902,/,\tdev",
dict(name="dev", tryretr=False, trycwd=True),
),
(
"+i8388621.44468,m839956783,r,s10376,\tRFCEPLF",
dict(name="RFCEPLF", tryretr=True, trycwd=False),
),
# UNIX-style listing, without inum and without blocks
("-rw-r--r-- 1 root other 531 Jan 29 03:26 README",
dict(name='README', tryretr=True, trycwd=False)),
("dr-xr-xr-x 2 root other 512 Apr 8 1994 etc",
dict(name='etc', tryretr=False, trycwd=True)),
("dr-xr-xr-x 2 root 512 Apr 8 1994 etc",
dict(name='etc', tryretr=False, trycwd=True)),
("lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin",
dict(name='usr/bin', tryretr=True, trycwd=True)),
(
"-rw-r--r-- 1 root other 531 Jan 29 03:26 README",
dict(name="README", tryretr=True, trycwd=False),
),
(
"dr-xr-xr-x 2 root other 512 Apr 8 1994 etc",
dict(name="etc", tryretr=False, trycwd=True),
),
(
"dr-xr-xr-x 2 root 512 Apr 8 1994 etc",
dict(name="etc", tryretr=False, trycwd=True),
),
(
"lrwxrwxrwx 1 root other 7 Jan 25 00:17 bin -> usr/bin",
dict(name="usr/bin", tryretr=True, trycwd=True),
),
# Also produced by Microsoft's FTP servers for Windows:
("---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z",
dict(name='ls-lR.Z', tryretr=True, trycwd=False)),
("d--------- 1 owner group 0 May 9 19:45 Softlib",
dict(name='Softlib', tryretr=False, trycwd=True)),
(
"---------- 1 owner group 1803128 Jul 10 10:18 ls-lR.Z",
dict(name="ls-lR.Z", tryretr=True, trycwd=False),
),
(
"d--------- 1 owner group 0 May 9 19:45 Softlib",
dict(name="Softlib", tryretr=False, trycwd=True),
),
# Also WFTPD for MSDOS:
("-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp",
dict(name='message.ftp', tryretr=True, trycwd=False)),
(
"-rwxrwxrwx 1 noone nogroup 322 Aug 19 1996 message.ftp",
dict(name="message.ftp", tryretr=True, trycwd=False),
),
# Also NetWare:
("d [R----F--] supervisor 512 Jan 16 18:53 login",
dict(name='login', tryretr=False, trycwd=True)),
("- [R----F--] rhesus 214059 Oct 20 15:27 cx.exe",
dict(name='cx.exe', tryretr=True, trycwd=False)),
(
"d [R----F--] supervisor 512 Jan 16 18:53 login",
dict(name="login", tryretr=False, trycwd=True),
),
(
"- [R----F--] rhesus 214059 Oct 20 15:27 cx.exe",
dict(name="cx.exe", tryretr=True, trycwd=False),
),
# Also NetPresenz for the Mac:
("-------r-- 326 1391972 1392298 Nov 22 1995 MegaPhone.sit",
dict(name='MegaPhone.sit', tryretr=True, trycwd=False)),
("drwxrwxr-x folder 2 May 10 1996 network",
dict(name='network', tryretr=False, trycwd=True)),
(
"-------r-- 326 1391972 1392298 Nov 22 1995 MegaPhone.sit",
dict(name="MegaPhone.sit", tryretr=True, trycwd=False),
),
(
"drwxrwxr-x folder 2 May 10 1996 network",
dict(name="network", tryretr=False, trycwd=True),
),
# MultiNet (some spaces removed from examples)
("00README.TXT;1 2 30-DEC-1996 17:44 [SYSTEM] (RWED,RWED,RE,RE)",
dict(name='00README.TXT', tryretr=True, trycwd=False)),
("CORE.DIR;1 1 8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)",
dict(name='CORE', tryretr=False, trycwd=True)),
(
"00README.TXT;1 2 30-DEC-1996 17:44 [SYSTEM] (RWED,RWED,RE,RE)",
dict(name="00README.TXT", tryretr=True, trycwd=False),
),
(
"CORE.DIR;1 1 8-SEP-1996 16:09 [SYSTEM] (RWE,RWE,RE,RE)",
dict(name="CORE", tryretr=False, trycwd=True),
),
# and non-MutliNet VMS:
("CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,)",
dict(name='CII-MANUAL.TEX', tryretr=True, trycwd=False)),
(
"CII-MANUAL.TEX;1 213/216 29-JAN-1996 03:33:12 [ANONYMOU,ANONYMOUS] (RWED,RWED,,)",
dict(name="CII-MANUAL.TEX", tryretr=True, trycwd=False),
),
# MSDOS format
("04-27-00 09:09PM <DIR> licensed",
dict(name='licensed', tryretr=False, trycwd=True)),
("07-18-00 10:16AM <DIR> pub",
dict(name='pub', tryretr=False, trycwd=True)),
("04-14-00 03:47PM 589 readme.htm",
dict(name='readme.htm', tryretr=True, trycwd=False)),
(
"04-27-00 09:09PM <DIR> licensed",
dict(name="licensed", tryretr=False, trycwd=True),
),
(
"07-18-00 10:16AM <DIR> pub",
dict(name="pub", tryretr=False, trycwd=True),
),
(
"04-14-00 03:47PM 589 readme.htm",
dict(name="readme.htm", tryretr=True, trycwd=False),
),
# Some useless lines, safely ignored:
("Total of 11 Files, 10966 Blocks.", None), # (VMS)
("total 14786", None), # (UNIX)
("DISK$ANONFTP:[ANONYMOUS]", None), # (VMS)
("Directory DISK$PCSA:[ANONYM]", None), # (VMS)
("Total of 11 Files, 10966 Blocks.", None), # (VMS)
("total 14786", None), # (UNIX)
("DISK$ANONFTP:[ANONYMOUS]", None), # (VMS)
("Directory DISK$PCSA:[ANONYM]", None), # (VMS)
("", None),
)
@ -86,5 +124,6 @@ class TestFtpparse(unittest.TestCase):
def test_ftpparse(self):
for line, expected in patterns:
res = ftpparse(line)
self.assertEqual(expected, res,
"got %r\nexpected %r\n%r" % (res, expected, line))
self.assertEqual(
expected, res, "got %r\nexpected %r\n%r" % (res, expected, line)
)

View file

@ -30,4 +30,4 @@ class TestLinkchecker(unittest.TestCase):
for option in ("-V", "--version", "-h", "--help", "--list-plugins", "-Dall"):
run_with_options([option])
# unknown option
self.assertRaises(OSError, run_with_options, ['--imadoofus'])
self.assertRaises(OSError, run_with_options, ["--imadoofus"])

View file

@ -28,22 +28,25 @@ class TestLinkparser(unittest.TestCase):
def _test_one_link(self, content, url):
self.count_url = 0
linkparse.find_links(htmlsoup.make_soup(content),
self._test_one_url(url), linkparse.LinkTags)
linkparse.find_links(
htmlsoup.make_soup(content), self._test_one_url(url), linkparse.LinkTags
)
self.assertEqual(self.count_url, 1)
def _test_one_url(self, origurl):
"""Return parser callback function."""
def callback(url, line, column, name, base):
self.count_url += 1
self.assertEqual(origurl, url)
return callback
def _test_no_link(self, content):
def callback(url, line, column, name, base):
self.assertTrue(False, 'URL %r found' % url)
linkparse.find_links(htmlsoup.make_soup(content), callback,
linkparse.LinkTags)
self.assertTrue(False, "URL %r found" % url)
linkparse.find_links(htmlsoup.make_soup(content), callback, linkparse.LinkTags)
def test_href_parsing(self):
# Test <a href> parsing.
@ -87,11 +90,11 @@ class TestLinkparser(unittest.TestCase):
self._test_one_link(content % url, url)
content = '<table style="background: url( %s ) no-repeat" >'
self._test_one_link(content % url, url)
content = '<table style="background: url(\'%s\') no-repeat" >'
content = "<table style=\"background: url('%s') no-repeat\" >"
self._test_one_link(content % url, url)
content = "<table style='background: url(\"%s\") no-repeat' >"
self._test_one_link(content % url, url)
content = '<table style="background: url(\'%s\' ) no-repeat" >'
content = "<table style=\"background: url('%s' ) no-repeat\" >"
self._test_one_link(content % url, url)
content = "<table style='background: url( \"%s\") no-repeat' >"
self._test_one_link(content % url, url)
@ -108,6 +111,6 @@ class TestLinkparser(unittest.TestCase):
self.assertEqual(strip(content), "abc")
def test_url_quoting(self):
url = 'http://example.com/bla/a=b'
url = "http://example.com/bla/a=b"
content = '<a href="%s&quot;">'
self._test_one_link(content % url, url + '"')

View file

@ -38,18 +38,17 @@ login_form = """
</html>
"""
class TestFormSearch(unittest.TestCase):
"""Test processing of a login form."""
def test_search_form(self):
form = loginformsearch.search_form(login_form,
"User_Field", "Password_Field")
form = loginformsearch.search_form(login_form, "User_Field", "Password_Field")
self.assertIsNotNone(form)
self.assertEqual(form.url, "/log_me_in")
self.assertIn("User_Field", form.data)
self.assertIn("Password_Field", form.data)
def test_search_form_none(self):
form = loginformsearch.search_form(login_form,
"user_field", "password_field")
form = loginformsearch.search_form(login_form, "user_field", "password_field")
self.assertIsNone(form)

View file

@ -22,6 +22,7 @@ import os
from . import get_file
import linkcheck.mimeutil
class TestMiMeutil(unittest.TestCase):
"""Test file utility functions."""

View file

@ -82,12 +82,16 @@ parsetests = [
("""<a href='"' >""", """<a href="&quot;"/>"""),
("""<a href="bla" %]" >""", """<a %]"="" href="bla"/>"""),
("""<a href=bla" >""", """<a href="bla&quot;"/>"""),
("""<a onmouseover=blubb('nav1','',"""\
"""'/images/nav.gif',1);move(this); b="c">""",
"""<a b="c" onmouseover="blubb('nav1','',"""\
"""'/images/nav.gif',1);move(this);"/>"""),
("""<a onClick=location.href('/index.htm') b="c">""",
"""<a b="c" onclick="location.href('/index.htm')"/>"""),
(
"""<a onmouseover=blubb('nav1','',"""
"""'/images/nav.gif',1);move(this); b="c">""",
"""<a b="c" onmouseover="blubb('nav1','',"""
"""'/images/nav.gif',1);move(this);"/>""",
),
(
"""<a onClick=location.href('/index.htm') b="c">""",
"""<a b="c" onclick="location.href('/index.htm')"/>""",
),
# entity resolving
("""<a href="&#6D;ailto:" >""", """<a href="D;ailto:"/>"""),
("""<a href="&amp;ailto:" >""", """<a href="&amp;ailto:"/>"""),
@ -98,19 +102,31 @@ parsetests = [
# note that \u8156 is not valid encoding and therefore gets removed
("""<a href="&#8156;ailto:" >""", """<a href="&#8156;ailto:"/>"""),
# mailto link
("""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
"""<a href="mailto:calvin@LocalHost?subject=Hallo&amp;to=michi">1</a>"""),
(
"""<a href=mailto:calvin@LocalHost?subject=Hallo&to=michi>1</a>""",
"""<a href="mailto:calvin@LocalHost?subject=Hallo&amp;to=michi">1</a>""",
),
# meta tag with charset encoding
("""<meta http-equiv="content-type" content>""",
"""<meta content="" http-equiv="content-type"/>"""),
("""<meta http-equiv="content-type" content=>""",
"""<meta content="" http-equiv="content-type"/>"""),
("""<meta http-equiv="content-type" content="hulla">""",
"""<meta content="hulla" http-equiv="content-type"/>"""),
("""<meta http-equiv="content-type" content="text/html; charset=iso8859-1">""",
"""<meta content="text/html; charset=iso8859-1" http-equiv="content-type"/>"""),
("""<meta http-equiv="content-type" content="text/html; charset=hulla">""",
"""<meta content="text/html; charset=hulla" http-equiv="content-type"/>"""),
(
"""<meta http-equiv="content-type" content>""",
"""<meta content="" http-equiv="content-type"/>""",
),
(
"""<meta http-equiv="content-type" content=>""",
"""<meta content="" http-equiv="content-type"/>""",
),
(
"""<meta http-equiv="content-type" content="hulla">""",
"""<meta content="hulla" http-equiv="content-type"/>""",
),
(
"""<meta http-equiv="content-type" content="text/html; charset=iso8859-1">""",
"""<meta content="text/html; charset=iso8859-1" http-equiv="content-type"/>""",
),
(
"""<meta http-equiv="content-type" content="text/html; charset=hulla">""",
"""<meta content="text/html; charset=hulla" http-equiv="content-type"/>""",
),
# missing > in end tag
("""</td <td a="b" >""", """"""),
("""</td<td a="b" >""", """"""),
@ -149,8 +165,7 @@ class TestParser(unittest.TestCase):
Check parse results.
"""
res = out.getvalue()
msg = "Test error; in: %r, out: %r, expect: %r" % \
(_in, res, _out)
msg = "Test error; in: %r, out: %r, expect: %r" % (_in, res, _out)
self.assertEqual(res, _out, msg=msg)
def test_encoding_detection_utf_content(self):
@ -162,7 +177,9 @@ class TestParser(unittest.TestCase):
self.encoding_test(html, "utf-8")
def test_encoding_detection_iso_content(self):
html = b'<meta http-equiv="content-type" content="text/html; charset=ISO8859-1">'
html = (
b'<meta http-equiv="content-type" content="text/html; charset=ISO8859-1">'
)
self.encoding_test(html, "iso8859-1")
def test_encoding_detection_iso_charset(self):

View file

@ -26,6 +26,7 @@ from tests import need_msgfmt, need_posix
pofiles = None
def get_pofiles():
"""Find all .po files in this source."""
global pofiles
@ -55,7 +56,7 @@ class TestGTranslator(unittest.TestCase):
def test_gtranslator(self):
"""Test all pofiles for GTranslator brokenness."""
for f in get_pofiles():
with open(f, 'rb') as fd:
with open(f, "rb") as fd:
self.check_file(fd, f)
def check_file(self, fd, f):
@ -63,5 +64,7 @@ class TestGTranslator(unittest.TestCase):
for line in fd:
if line.strip().startswith(b"#"):
continue
self.assertFalse(b"\xc2\xb7" in line,
"Broken GTranslator copy/paste in %r:\n%r" % (f, line))
self.assertFalse(
b"\xc2\xb7" in line,
"Broken GTranslator copy/paste in %r:\n%r" % (f, line),
)

View file

@ -43,14 +43,18 @@ class TestRobotParser(unittest.TestCase):
@need_network
def test_nonexisting_robots(self):
# robots.txt that does not exist
self.rp.set_url('http://www.lycos.com/robots.txt')
self.rp.set_url("http://www.lycos.com/robots.txt")
self.rp.read()
self.check(self.rp.can_fetch(configuration.UserAgent,
'http://www.lycos.com/search'), True)
self.check(
self.rp.can_fetch(configuration.UserAgent, "http://www.lycos.com/search"),
True,
)
@need_network
def test_disallowed_robots(self):
self.rp.set_url('http://google.com/robots.txt')
self.rp.set_url("http://google.com/robots.txt")
self.rp.read()
self.check(self.rp.can_fetch(configuration.UserAgent,
"http://google.com/search"), False)
self.check(
self.rp.can_fetch(configuration.UserAgent, "http://google.com/search"),
False,
)

View file

@ -166,8 +166,8 @@ class TestRobotsTxt(unittest.TestCase):
]
self.rp.parse(lines)
self.assertEqual(str(self.rp), "\n".join(lines2))
good = ['/', '/test.html']
bad = ['/cyberworld/map/index.html', '/tmp/xxx', '/foo.html']
good = ["/", "/test.html"]
bad = ["/cyberworld/map/index.html", "/tmp/xxx", "/foo.html"]
self.check_urls(good, bad)
def test_access2(self):
@ -191,8 +191,8 @@ class TestRobotsTxt(unittest.TestCase):
]
self.rp.parse(lines)
self.assertEqual(str(self.rp), "\n".join(lines2))
good = ['/', '/test.html', ('cybermapper', '/cyberworld/map/index.html')]
bad = ['/cyberworld/map/index.html']
good = ["/", "/test.html", ("cybermapper", "/cyberworld/map/index.html")]
bad = ["/cyberworld/map/index.html"]
self.check_urls(good, bad)
def test_access3(self):
@ -208,7 +208,7 @@ class TestRobotsTxt(unittest.TestCase):
self.rp.parse(lines)
self.assertEqual(str(self.rp), "\n".join(lines2))
good = []
bad = ['/cyberworld/map/index.html', '/', '/tmp/']
bad = ["/cyberworld/map/index.html", "/", "/tmp/"]
self.check_urls(good, bad)
def test_access4(self):
@ -224,18 +224,25 @@ class TestRobotsTxt(unittest.TestCase):
"Disallow: /tmp",
"Disallow: /a%3Cd.html",
"Disallow: /a/b.html",
"Disallow: /%7Ejoe/index.html" if sys.version_info < (3, 7) \
"Disallow: /%7Ejoe/index.html"
if sys.version_info < (3, 7)
else "Disallow: /~joe/index.html",
]
self.rp.parse(lines)
self.assertEqual(str(self.rp), "\n".join(lines2))
good = []
bad = ['/tmp', '/tmp.html', '/tmp/a.html',
'/a%3cd.html', '/a%3Cd.html', '/a%2fb.html',
'/~joe/index.html', '/a/b.html',
bad = [
"/tmp",
"/tmp.html",
"/tmp/a.html",
"/a%3cd.html",
"/a%3Cd.html",
"/a%2fb.html",
"/~joe/index.html",
"/a/b.html",
]
self.check_urls(good, bad, 'figtree')
self.check_urls(good, bad, 'FigTree/1.0 Robot libwww-perl/5.04')
self.check_urls(good, bad, "figtree")
self.check_urls(good, bad, "FigTree/1.0 Robot libwww-perl/5.04")
def test_access5(self):
lines = [
@ -250,15 +257,21 @@ class TestRobotsTxt(unittest.TestCase):
"Disallow: /tmp/",
"Disallow: /a%3Cd.html",
"Disallow: /a/b.html",
"Disallow: /%7Ejoe/index.html" if sys.version_info < (3, 7) \
"Disallow: /%7Ejoe/index.html"
if sys.version_info < (3, 7)
else "Disallow: /~joe/index.html",
]
self.rp.parse(lines)
self.assertEqual(str(self.rp), "\n".join(lines2))
good = ['/tmp'] # XFAIL: '/a%2fb.html'
bad = ['/tmp/', '/tmp/a.html',
'/a%3cd.html', '/a%3Cd.html', "/a/b.html",
'/%7Ejoe/index.html']
good = ["/tmp"] # XFAIL: '/a%2fb.html'
bad = [
"/tmp/",
"/tmp/a.html",
"/a%3cd.html",
"/a%3Cd.html",
"/a/b.html",
"/%7Ejoe/index.html",
]
self.check_urls(good, bad)
def test_access6(self):
@ -267,8 +280,8 @@ class TestRobotsTxt(unittest.TestCase):
"Disallow: /.",
]
self.rp.parse(lines)
good = ['/foo.html']
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
good = ["/foo.html"]
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
self.check_urls(good, bad)
def test_access7(self):

View file

@ -35,14 +35,14 @@ class TestStrFormat(unittest.TestCase):
self.assertEqual(u(""), "")
self.assertEqual(u(None), None)
self.assertEqual(u("'"), "'")
self.assertEqual(u("\""), "\"")
self.assertEqual(u("\"\""), "")
self.assertEqual(u('"'), '"')
self.assertEqual(u('""'), "")
self.assertEqual(u("''"), "")
self.assertEqual(u("'a'"), "a")
self.assertEqual(u("'a\"'"), "a\"")
self.assertEqual(u("'\"a'"), "\"a")
self.assertEqual(u('"a\'"'), 'a\'')
self.assertEqual(u('"\'a"'), '\'a')
self.assertEqual(u("'a\"'"), 'a"')
self.assertEqual(u("'\"a'"), '"a')
self.assertEqual(u('"a\'"'), "a'")
self.assertEqual(u('"\'a"'), "'a")
self.assertEqual(u("'a'", matching=True), "a")
self.assertEqual(u('"a"', matching=True), "a")
# even mis-matching quotes should be removed...
@ -55,18 +55,17 @@ class TestStrFormat(unittest.TestCase):
def test_wrap(self):
# Test line wrapping.
wrap = linkcheck.strformat.wrap
s = "11%(sep)s22%(sep)s33%(sep)s44%(sep)s55" % {'sep': os.linesep}
s = "11%(sep)s22%(sep)s33%(sep)s44%(sep)s55" % {"sep": os.linesep}
# testing width <= 0
self.assertEqual(wrap(s, -1), s)
self.assertEqual(wrap(s, 0), s)
l = len(os.linesep)
gap = " "
s2 = "11%(gap)s22%(sep)s33%(gap)s44%(sep)s55" % \
{'sep': os.linesep, 'gap': gap}
s2 = "11%(gap)s22%(sep)s33%(gap)s44%(sep)s55" % {"sep": os.linesep, "gap": gap}
# splitting lines
self.assertEqual(wrap(s2, 2), s)
# combining lines
self.assertEqual(wrap(s, 4+l), s2)
self.assertEqual(wrap(s, 4 + l), s2)
# misc
self.assertEqual(wrap(s, -1), s)
self.assertEqual(wrap(s, 0), s)
@ -88,13 +87,11 @@ class TestStrFormat(unittest.TestCase):
self.assertEqual(linkcheck.strformat.strsize(2), "2B")
self.assertEqual(linkcheck.strformat.strsize(1023, grouping=False), "1023B")
self.assertEqual(linkcheck.strformat.strsize(1024), "1KB")
self.assertEqual(linkcheck.strformat.strsize(1024*25), "25.00KB")
self.assertEqual(linkcheck.strformat.strsize(1024*1024), "1.00MB")
self.assertEqual(linkcheck.strformat.strsize(1024*1024*11), "11.0MB")
self.assertEqual(linkcheck.strformat.strsize(1024*1024*1024),
"1.00GB")
self.assertEqual(linkcheck.strformat.strsize(1024*1024*1024*14),
"14.0GB")
self.assertEqual(linkcheck.strformat.strsize(1024 * 25), "25.00KB")
self.assertEqual(linkcheck.strformat.strsize(1024 * 1024), "1.00MB")
self.assertEqual(linkcheck.strformat.strsize(1024 * 1024 * 11), "11.0MB")
self.assertEqual(linkcheck.strformat.strsize(1024 * 1024 * 1024), "1.00GB")
self.assertEqual(linkcheck.strformat.strsize(1024 * 1024 * 1024 * 14), "14.0GB")
def test_is_ascii(self):
self.assertTrue(linkcheck.strformat.is_ascii("abcd./"))
@ -103,7 +100,7 @@ class TestStrFormat(unittest.TestCase):
def test_indent(self):
s = "bla"
self.assertEqual(linkcheck.strformat.indent(s, ""), s)
self.assertEqual(linkcheck.strformat.indent(s, " "), " "+s)
self.assertEqual(linkcheck.strformat.indent(s, " "), " " + s)
def test_stripurl(self):
self.assertEqual(linkcheck.strformat.stripurl("a\tb"), "a\tb")
@ -121,7 +118,7 @@ class TestStrFormat(unittest.TestCase):
def test_strtime(self):
zone = linkcheck.strformat.strtimezone()
t = linkcheck.strformat.strtime(0, func=time.gmtime)
self.assertEqual(t, "1970-01-01 00:00:00"+zone)
self.assertEqual(t, "1970-01-01 00:00:00" + zone)
def test_duration(self):
duration = linkcheck.strformat.strduration
@ -132,8 +129,8 @@ class TestStrFormat(unittest.TestCase):
self.assertEqual(duration(2), "00:02")
self.assertEqual(duration(60), "01:00")
self.assertEqual(duration(120), "02:00")
self.assertEqual(duration(60*60), "01:00:00")
self.assertEqual(duration(60*60*24), "24:00:00")
self.assertEqual(duration(60 * 60), "01:00:00")
self.assertEqual(duration(60 * 60 * 24), "24:00:00")
def test_duration_long(self):
duration = lambda s: linkcheck.strformat.strduration_long(s, do_translate=False)
@ -144,11 +141,12 @@ class TestStrFormat(unittest.TestCase):
self.assertEqual(duration(2), "2 seconds")
self.assertEqual(duration(60), "1 minute")
self.assertEqual(duration(120), "2 minutes")
self.assertEqual(duration(60*60), "1 hour")
self.assertEqual(duration(60*60*24), "1 day")
self.assertEqual(duration(60*60*24*365), "1 year")
self.assertEqual(duration(60*60*24*365 + 60*60*24 + 2),
"1 year, 1 day")
self.assertEqual(duration(60 * 60), "1 hour")
self.assertEqual(duration(60 * 60 * 24), "1 day")
self.assertEqual(duration(60 * 60 * 24 * 365), "1 year")
self.assertEqual(
duration(60 * 60 * 24 * 365 + 60 * 60 * 24 + 2), "1 year, 1 day"
)
def test_linenumber(self):
get_line_number = linkcheck.strformat.get_line_number
@ -158,8 +156,8 @@ class TestStrFormat(unittest.TestCase):
def test_encoding(self):
is_encoding = linkcheck.strformat.is_encoding
self.assertTrue(is_encoding('ascii'))
self.assertFalse(is_encoding('hulla'))
self.assertTrue(is_encoding("ascii"))
self.assertFalse(is_encoding("hulla"))
def test_unicode_safe(self):
unicode_safe = linkcheck.strformat.unicode_safe

View file

@ -30,10 +30,10 @@ import linkcheck.url
# '-': '-',
# All portions of the URI must be utf-8 encoded NFC form Unicode strings
#valid: http://example.com/?q=%C3%87 (C-cedilla U+00C7)
#valid: http://example.com/?q=%E2%85%A0 (Roman numeral one U+2160)
#invalid: http://example.com/?q=%C7 (C-cedilla ISO-8859-1)
#invalid: http://example.com/?q=C%CC%A7
# valid: http://example.com/?q=%C3%87 (C-cedilla U+00C7)
# valid: http://example.com/?q=%E2%85%A0 (Roman numeral one U+2160)
# invalid: http://example.com/?q=%C7 (C-cedilla ISO-8859-1)
# invalid: http://example.com/?q=C%CC%A7
# (Latin capital letter C + Combining cedilla U+0327)
@ -45,11 +45,14 @@ class TestUrl(unittest.TestCase):
"""Test url norming and quoting."""
def urlnormtest(self, url, nurl, encoding=None):
self.assertFalse(linkcheck.url.url_needs_quoting(nurl),
"Result URL %r must not need quoting" % nurl)
self.assertFalse(
linkcheck.url.url_needs_quoting(nurl),
"Result URL %r must not need quoting" % nurl,
)
nurl1 = url_norm(url, encoding=encoding)
self.assertFalse(linkcheck.url.url_needs_quoting(nurl1),
"Normed URL %r needs quoting" % nurl)
self.assertFalse(
linkcheck.url.url_needs_quoting(nurl1), "Normed URL %r needs quoting" % nurl
)
self.assertEqual(nurl1, nurl)
def test_wayback(self):
@ -58,10 +61,11 @@ class TestUrl(unittest.TestCase):
def test_pathattack(self):
# Windows winamp path attack prevention.
url = "http://server/..%5c..%5c..%5c..%5c..%5c..%5c..%5c.."\
"%5ccskin.zip"
url = "http://server/..%5c..%5c..%5c..%5c..%5c..%5c..%5c.." "%5ccskin.zip"
nurl = "http://server/cskin.zip"
self.assertEqual(linkcheck.url.url_quote(url_norm(url), encoding="iso-8859-1"), nurl)
self.assertEqual(
linkcheck.url.url_quote(url_norm(url), encoding="iso-8859-1"), nurl
)
def test_safe_patterns(self):
is_safe_host = linkcheck.url.is_safe_host
@ -76,6 +80,7 @@ class TestUrl(unittest.TestCase):
def test_url_quote(self):
def url_quote(url):
return linkcheck.url.url_quote(url, encoding="utf-8")
url = "http://a:80/bcd"
self.assertEqual(url_quote(url), url)
url = "http://a:80/bcd?"
@ -95,14 +100,15 @@ class TestUrl(unittest.TestCase):
def test_norm_quote(self):
# Test url norm quoting.
url = "http://groups.google.com/groups?hl=en&lr&ie=UTF-8&"\
"threadm=3845B54D.E546F9BD%40monmouth.com&rnum=2&"\
"prev=/groups%3Fq%3Dlogitech%2Bwingman%2Bextreme%2Bdigital"\
"%2B3d%26hl%3Den%26lr%3D%26ie%3DUTF-8%26selm%3D3845B54D.E5"\
"46F9BD%2540monmouth.com%26rnum%3D2"
url = (
"http://groups.google.com/groups?hl=en&lr&ie=UTF-8&"
"threadm=3845B54D.E546F9BD%40monmouth.com&rnum=2&"
"prev=/groups%3Fq%3Dlogitech%2Bwingman%2Bextreme%2Bdigital"
"%2B3d%26hl%3Den%26lr%3D%26ie%3DUTF-8%26selm%3D3845B54D.E5"
"46F9BD%2540monmouth.com%26rnum%3D2"
)
self.urlnormtest(url, url)
url = "http://redirect.alexa.com/redirect?"\
"http://www.offeroptimizer.com"
url = "http://redirect.alexa.com/redirect?" "http://www.offeroptimizer.com"
self.urlnormtest(url, url)
url = "http://www.lesgensducinema.com/photo/Philippe%20Nahon.jpg"
self.urlnormtest(url, url)
@ -239,125 +245,127 @@ class TestUrl(unittest.TestCase):
def test_norm_path_relative_dots(self):
# Test url norm relative path handling with dots.
# normalize redundant path segments
url = '/foo/bar/.'
nurl = '/foo/bar/'
url = "/foo/bar/."
nurl = "/foo/bar/"
self.urlnormtest(url, nurl)
url = '/foo/bar/./'
nurl = '/foo/bar/'
url = "/foo/bar/./"
nurl = "/foo/bar/"
self.urlnormtest(url, nurl)
url = '/foo/bar/..'
nurl = '/foo/'
url = "/foo/bar/.."
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = '/foo/bar/../'
nurl = '/foo/'
url = "/foo/bar/../"
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = '/foo/bar/../baz'
nurl = '/foo/baz'
url = "/foo/bar/../baz"
nurl = "/foo/baz"
self.urlnormtest(url, nurl)
url = '/foo/bar/../..'
nurl = '/'
url = "/foo/bar/../.."
nurl = "/"
self.urlnormtest(url, nurl)
url = '/foo/bar/../../'
nurl = '/'
url = "/foo/bar/../../"
nurl = "/"
self.urlnormtest(url, nurl)
url = '/foo/bar/../../baz'
nurl = '/baz'
url = "/foo/bar/../../baz"
nurl = "/baz"
self.urlnormtest(url, nurl)
url = '/foo/bar/../../../baz'
nurl = '/baz'
url = "/foo/bar/../../../baz"
nurl = "/baz"
self.urlnormtest(url, nurl)
url = '/foo/bar/../../../../baz'
nurl = '/baz'
url = "/foo/bar/../../../../baz"
nurl = "/baz"
self.urlnormtest(url, nurl)
url = '/./foo'
nurl = '/foo'
url = "/./foo"
nurl = "/foo"
self.urlnormtest(url, nurl)
url = '/../foo'
nurl = '/foo'
url = "/../foo"
nurl = "/foo"
self.urlnormtest(url, nurl)
url = '/foo.'
url = "/foo."
nurl = url
self.urlnormtest(url, nurl)
url = '/.foo'
url = "/.foo"
nurl = url
self.urlnormtest(url, nurl)
url = '/foo..'
url = "/foo.."
nurl = url
self.urlnormtest(url, nurl)
url = '/..foo'
url = "/..foo"
nurl = url
self.urlnormtest(url, nurl)
url = '/./../foo'
nurl = '/foo'
url = "/./../foo"
nurl = "/foo"
self.urlnormtest(url, nurl)
url = '/./foo/.'
nurl = '/foo/'
url = "/./foo/."
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = '/foo/./bar'
nurl = '/foo/bar'
url = "/foo/./bar"
nurl = "/foo/bar"
self.urlnormtest(url, nurl)
url = '/foo/../bar'
nurl = '/bar'
url = "/foo/../bar"
nurl = "/bar"
self.urlnormtest(url, nurl)
url = '../../../images/miniXmlButton.gif'
url = "../../../images/miniXmlButton.gif"
nurl = url
self.urlnormtest(url, nurl)
url = '/a..b/../images/miniXmlButton.gif'
nurl = '/images/miniXmlButton.gif'
url = "/a..b/../images/miniXmlButton.gif"
nurl = "/images/miniXmlButton.gif"
self.urlnormtest(url, nurl)
url = '/.a.b/../foo/'
nurl = '/foo/'
url = "/.a.b/../foo/"
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = '/..a.b/../foo/'
nurl = '/foo/'
url = "/..a.b/../foo/"
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = 'b/../../foo/'
nurl = '../foo/'
url = "b/../../foo/"
nurl = "../foo/"
self.urlnormtest(url, nurl)
url = './foo'
nurl = 'foo'
url = "./foo"
nurl = "foo"
self.urlnormtest(url, nurl)
def test_norm_path_relative_slashes(self):
# Test url norm relative path handling with slashes.
url = '/foo//'
nurl = '/foo/'
url = "/foo//"
nurl = "/foo/"
self.urlnormtest(url, nurl)
url = '/foo///bar//'
nurl = '/foo/bar/'
url = "/foo///bar//"
nurl = "/foo/bar/"
self.urlnormtest(url, nurl)
def test_mail_url(self):
# Test mailto URLs.
# no netloc and no path
url = 'mailto:'
url = "mailto:"
nurl = url
self.urlnormtest(url, nurl)
# standard email
url = 'mailto:user@www.example.org'
url = "mailto:user@www.example.org"
nurl = url
self.urlnormtest(url, nurl)
# emails with subject
url = 'mailto:user@www.example.org?subject=a_b'
url = "mailto:user@www.example.org?subject=a_b"
nurl = url
self.urlnormtest(url, nurl)
url = 'mailto:business.inquiries@designingpatterns.com?subject=Business%20Inquiry'
url = (
"mailto:business.inquiries@designingpatterns.com?subject=Business%20Inquiry"
)
nurl = url
self.urlnormtest(url, nurl)
def test_norm_other(self):
# Test norming of other schemes.
url = 'news:'
nurl = 'news:'
url = "news:"
nurl = "news:"
self.urlnormtest(url, nurl)
url = 'snews:'
nurl = 'snews://'
url = "snews:"
nurl = "snews://"
self.urlnormtest(url, nurl)
url = 'nntp:'
nurl = 'nntp://'
url = "nntp:"
nurl = "nntp://"
self.urlnormtest(url, nurl)
url = "news:!$%&/()="
nurl = 'news:!%24%25%26/()='
nurl = "news:!%24%25%26/()="
self.urlnormtest(url, nurl)
url = "news:comp.infosystems.www.servers.unix"
nurl = url
@ -367,9 +375,9 @@ class TestUrl(unittest.TestCase):
nurl = url
self.urlnormtest(url, nurl)
# ldap url # XXX failing on Travis build
#url = "ldap://[2001:db8::7]/c=GB?objectClass?one"
#nurl = "ldap://%5B2001:db8::7%5D/c=GB?objectClass?one"
#self.urlnormtest(url, nurl)
# url = "ldap://[2001:db8::7]/c=GB?objectClass?one"
# nurl = "ldap://%5B2001:db8::7%5D/c=GB?objectClass?one"
# self.urlnormtest(url, nurl)
url = "tel:+1-816-555-1212"
nurl = url
self.urlnormtest(url, nurl)
@ -409,9 +417,9 @@ class TestUrl(unittest.TestCase):
url = "file:///a/ä.txt"
nurl = "file:///a/%C3%A4.txt"
self.urlnormtest(url, nurl)
#url = "file:///\u041c\u043e\u0448\u043a\u043e\u0432\u0430.bin"
#nurl = "file:///a.bin" # XXX
#self.urlnormtest(url, nurl)
# url = "file:///\u041c\u043e\u0448\u043a\u043e\u0432\u0430.bin"
# nurl = "file:///a.bin" # XXX
# self.urlnormtest(url, nurl)
def test_norm_invalid(self):
url = "äöü?:"
@ -471,7 +479,7 @@ class TestUrl(unittest.TestCase):
self.assertTrue(not linkcheck.url.url_is_absolute(url), repr(url))
def test_nopathquote_chars(self):
if os.name == 'nt':
if os.name == "nt":
url = "file:///c|/msys/"
nurl = url
self.assertEqual(url_norm(url), nurl)
@ -482,12 +490,12 @@ class TestUrl(unittest.TestCase):
def test_idn_encoding(self):
# Test idna encoding.
url = 'www.öko.de'
url = "www.öko.de"
idna_encode = linkcheck.url.idna_encode
encurl, is_idn = idna_encode(url)
self.assertTrue(is_idn)
self.assertTrue(encurl)
url = ''
url = ""
encurl, is_idn = idna_encode(url)
self.assertFalse(is_idn)
self.assertFalse(encurl)
@ -531,7 +539,7 @@ class TestUrl(unittest.TestCase):
self.assertEqual(linkcheck.url.url_parse_query(u, encoding="utf-8"), u)
def test_long_cgi(self):
u = "/test%s;" % ("?a="*1000)
u = "/test%s;" % ("?a=" * 1000)
self.assertEqual(linkcheck.url.url_parse_query(u, encoding="utf-8"), u)
def test_port(self):
@ -560,7 +568,7 @@ class TestUrl(unittest.TestCase):
@need_network
def test_get_content(self):
linkcheck.url.get_content('http://www.debian.org/')
linkcheck.url.get_content("http://www.debian.org/")
def test_duplicate_urls(self):
is_dup = linkcheck.url.is_duplicate_content_url

View file

@ -29,7 +29,7 @@ def get_test_aggregate():
Initialize a test configuration object.
"""
config = linkcheck.configuration.Configuration()
config['logger'] = config.logger_new('none')
config["logger"] = config.logger_new("none")
return linkcheck.director.get_aggregate(config)
@ -45,29 +45,28 @@ class TestUrlBuild(unittest.TestCase):
aggregate = get_test_aggregate()
o = get_url_from(base_url, recursion_level, aggregate, parent_url=parent_url)
o.build_url()
self.assertEqual(o.url, 'http://foo')
self.assertEqual(o.url, "http://foo")
def test_urljoin(self):
parent_url = "http://localhost:8001/test"
base_url = ";param=value"
res = linkcheck.checker.urlbase.urljoin(parent_url, base_url)
self.assertEqual(res, 'http://localhost:8001/;param=value')
self.assertEqual(res, "http://localhost:8001/;param=value")
def test_urljoin_file(self):
parent_url = "file:///a/b.html"
base_url = "?c=d"
recursion_level = 0
aggregate = get_test_aggregate()
o = get_url_from(base_url, recursion_level,
aggregate, parent_url=parent_url)
o = get_url_from(base_url, recursion_level, aggregate, parent_url=parent_url)
o.build_url()
self.assertEqual(o.url, parent_url)
def test_http_build2(self):
parent_url = 'http://example.org/test?a=b&c=d'
base_url = '#usemap'
parent_url = "http://example.org/test?a=b&c=d"
base_url = "#usemap"
recursion_level = 0
aggregate = get_test_aggregate()
o = get_url_from(base_url, recursion_level, aggregate, parent_url=parent_url)
o.build_url()
self.assertEqual(o.url, parent_url+base_url)
self.assertEqual(o.url, parent_url + base_url)