diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py index 7f6ad51b..45f8e84d 100644 --- a/linkcheck/parser/__init__.py +++ b/linkcheck/parser/__init__.py @@ -130,7 +130,7 @@ def find_links (url_data, callback, tags): handler.parser = parser # parse try: - content = url_data.get_content() + content = url_data.get_raw_content() with parse_mutex: parser.feed(content) parser.flush() diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..f78fcd79 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +addopts = -ra --tb=short diff --git a/tests/checker/__init__.py b/tests/checker/__init__.py index b5fe7d80..674e5e86 100644 --- a/tests/checker/__init__.py +++ b/tests/checker/__init__.py @@ -231,10 +231,14 @@ class LinkCheckTest (unittest.TestCase): url_data = get_url_from(url, 0, aggregate, extern=(0, 0)) aggregate.urlqueue.put(url_data) linkcheck.director.check_urls(aggregate) - diff = aggregate.config['logger'].diff + logger = aggregate.config['logger'] + diff = logger.diff if diff: msg = str_text(os.linesep).join([url] + diff) self.fail_unicode(msg) + if logger.stats.internal_errors: + self.fail_unicode("%d internal errors occurred!" + % logger.stats.internal_errors) def fail_unicode (self, msg): """Print encoded fail message.""" diff --git a/tests/checker/data/utf8.html b/tests/checker/data/utf8.html new file mode 100644 index 00000000..c369b145 --- /dev/null +++ b/tests/checker/data/utf8.html @@ -0,0 +1,5 @@ + + + +
Some text — but with an em-dash.
+ diff --git a/tests/checker/data/utf8.html.result b/tests/checker/data/utf8.html.result new file mode 100644 index 00000000..814ffd71 --- /dev/null +++ b/tests/checker/data/utf8.html.result @@ -0,0 +1,5 @@ +url file://%(curdir)s/%(datadir)s/utf8.html +cache key file://%(curdir)s/%(datadir)s/utf8.html +real url file://%(curdir)s/%(datadir)s/utf8.html +name %(datadir)s/utf8.html +valid diff --git a/tests/checker/test_misc.py b/tests/checker/test_misc.py index f9591f9d..24d18034 100644 --- a/tests/checker/test_misc.py +++ b/tests/checker/test_misc.py @@ -33,6 +33,9 @@ class TestMisc (LinkCheckTest): def test_html5 (self): self.file_test("html5.html") + def test_utf8 (self): + self.file_test("utf8.html") + @need_network def test_archive (self): self.file_test("archive.html") diff --git a/tox.ini b/tox.ini index 92c6507f..c14d398f 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,6 @@ deps = miniboa biplist commands = - py.test {posargs:--tb=short --cov=linkcheck tests} + pytest {posargs:--cov=linkcheck} setenv = LC_ALL=en_US.utf-8