Python3: fix robotparser

This commit is contained in:
Petr Dlouhý 2018-01-05 20:59:34 +01:00 committed by Chris Mayo
parent 5179e47c52
commit 8a294be95f
2 changed files with 6 additions and 6 deletions

View file

@ -32,7 +32,7 @@ import time
import requests
from . import log, LOG_CHECK, configuration
from . import log, LOG_CHECK, configuration, url as urlutil
__all__ = ["RobotFileParser"]
@ -104,7 +104,7 @@ class RobotFileParser (object):
response.raise_for_status()
content_type = response.headers.get('content-type')
if content_type and content_type.lower().startswith('text/plain'):
self.parse(response.iter_lines())
self.parse(response.iter_lines(decode_unicode=True))
else:
log.debug(LOG_CHECK, "%r allow all (no text content)", self.url)
self.allow_all = True
@ -281,7 +281,7 @@ class RuleLine (object):
# an empty value means allow all
allowance = True
path = '/'
self.path = parse.quote(path)
self.path = urlutil.url_quote_part(path)
self.allowance = allowance
def applies_to (self, path):

View file

@ -134,7 +134,7 @@ class TestLogger (linkcheck.logger._Logger):
if not isinstance(line, str_text):
# The ---, +++ and @@ lines from diff format are ascii encoded.
# Make them unicode.
line = unicode(line, "ascii", "replace")
line = str_text(line, "ascii", "replace")
self.diff.append(line)
@ -233,7 +233,7 @@ class LinkCheckTest (unittest.TestCase):
linkcheck.director.check_urls(aggregate)
diff = aggregate.config['logger'].diff
if diff:
msg = unicode(os.linesep).join([url] + diff)
msg = str_text(os.linesep).join([url] + diff)
self.fail_unicode(msg)
def fail_unicode (self, msg):
@ -263,7 +263,7 @@ class LinkCheckTest (unittest.TestCase):
if diff:
l = [u"Differences found testing %s" % url]
l.extend(x.rstrip() for x in diff[2:])
self.fail_unicode(unicode(os.linesep).join(l))
self.fail_unicode(str_text(os.linesep).join(l))
class MailTest (LinkCheckTest):