Merge pull request #319 from linkchecker/nonascii-regression

Fix TypeError: string arg required in find_links()
This commit is contained in:
Marius Gedminas 2019-10-21 18:02:16 +03:00 committed by GitHub
commit ade5a5c399
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
7 changed files with 23 additions and 3 deletions

View file

@ -130,7 +130,7 @@ def find_links (url_data, callback, tags):
handler.parser = parser
# parse
try:
content = url_data.get_content()
content = url_data.get_raw_content()
with parse_mutex:
parser.feed(content)
parser.flush()

3
pytest.ini Normal file
View file

@ -0,0 +1,3 @@
[pytest]
testpaths = tests
addopts = -ra --tb=short

View file

@ -231,10 +231,14 @@ class LinkCheckTest (unittest.TestCase):
url_data = get_url_from(url, 0, aggregate, extern=(0, 0))
aggregate.urlqueue.put(url_data)
linkcheck.director.check_urls(aggregate)
diff = aggregate.config['logger'].diff
logger = aggregate.config['logger']
diff = logger.diff
if diff:
msg = str_text(os.linesep).join([url] + diff)
self.fail_unicode(msg)
if logger.stats.internal_errors:
self.fail_unicode("%d internal errors occurred!"
% logger.stats.internal_errors)
def fail_unicode (self, msg):
"""Print encoded fail message."""

View file

@ -0,0 +1,5 @@
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<p>Some text — but with an em-dash.</p>
</html>

View file

@ -0,0 +1,5 @@
url file://%(curdir)s/%(datadir)s/utf8.html
cache key file://%(curdir)s/%(datadir)s/utf8.html
real url file://%(curdir)s/%(datadir)s/utf8.html
name %(datadir)s/utf8.html
valid

View file

@ -33,6 +33,9 @@ class TestMisc (LinkCheckTest):
def test_html5 (self):
self.file_test("html5.html")
def test_utf8 (self):
self.file_test("utf8.html")
@need_network
def test_archive (self):
self.file_test("archive.html")

View file

@ -12,6 +12,6 @@ deps =
miniboa
biplist
commands =
py.test {posargs:--tb=short --cov=linkcheck tests}
pytest {posargs:--cov=linkcheck}
setenv =
LC_ALL=en_US.utf-8