From 42c75b5ef9dc3e6f760d3c8964ff8dfdb68e676d Mon Sep 17 00:00:00 2001 From: Marius Gedminas Date: Mon, 21 Oct 2019 17:42:29 +0300 Subject: [PATCH 1/3] Move some pytest options into pytest.ini This is so that I can run `tox -- -n 8` to run the tests in parallel, or `tox -- tests/checker/test_misc.py::TestMisc::test_html5` to run just a single test, without having to repeat all the other options. I haven't moved --cov=linkcheck because I don't want coverage results when I'm limiting the test run to a single test (they just make the interesting bit -- the test result itself -- scroll up). I've also added -ra to the default option list because then several tests fail, I'd like to see a list of their names in one place, not spead out between the huge tracebacks. --- pytest.ini | 3 +++ tox.ini | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) create mode 100644 pytest.ini diff --git a/pytest.ini b/pytest.ini new file mode 100644 index 00000000..f78fcd79 --- /dev/null +++ b/pytest.ini @@ -0,0 +1,3 @@ +[pytest] +testpaths = tests +addopts = -ra --tb=short diff --git a/tox.ini b/tox.ini index 92c6507f..c14d398f 100644 --- a/tox.ini +++ b/tox.ini @@ -12,6 +12,6 @@ deps = miniboa biplist commands = - py.test {posargs:--tb=short --cov=linkcheck tests} + pytest {posargs:--cov=linkcheck} setenv = LC_ALL=en_US.utf-8 From a4967fe92c38d6715ca3f7585d9127a6499fe066 Mon Sep 17 00:00:00 2001 From: Marius Gedminas Date: Mon, 21 Oct 2019 17:45:18 +0300 Subject: [PATCH 2/3] Add a regression test for issue #317 The important bit was making the `file_test` helper not ignore internal errors. --- tests/checker/__init__.py | 6 +++++- tests/checker/data/utf8.html | 5 +++++ tests/checker/data/utf8.html.result | 0 tests/checker/test_misc.py | 3 +++ 4 files changed, 13 insertions(+), 1 deletion(-) create mode 100644 tests/checker/data/utf8.html create mode 100644 tests/checker/data/utf8.html.result diff --git a/tests/checker/__init__.py b/tests/checker/__init__.py index b5fe7d80..674e5e86 100644 --- a/tests/checker/__init__.py +++ b/tests/checker/__init__.py @@ -231,10 +231,14 @@ class LinkCheckTest (unittest.TestCase): url_data = get_url_from(url, 0, aggregate, extern=(0, 0)) aggregate.urlqueue.put(url_data) linkcheck.director.check_urls(aggregate) - diff = aggregate.config['logger'].diff + logger = aggregate.config['logger'] + diff = logger.diff if diff: msg = str_text(os.linesep).join([url] + diff) self.fail_unicode(msg) + if logger.stats.internal_errors: + self.fail_unicode("%d internal errors occurred!" + % logger.stats.internal_errors) def fail_unicode (self, msg): """Print encoded fail message.""" diff --git a/tests/checker/data/utf8.html b/tests/checker/data/utf8.html new file mode 100644 index 00000000..c369b145 --- /dev/null +++ b/tests/checker/data/utf8.html @@ -0,0 +1,5 @@ + + + +

Some text — but with an em-dash.

+ diff --git a/tests/checker/data/utf8.html.result b/tests/checker/data/utf8.html.result new file mode 100644 index 00000000..e69de29b diff --git a/tests/checker/test_misc.py b/tests/checker/test_misc.py index f9591f9d..24d18034 100644 --- a/tests/checker/test_misc.py +++ b/tests/checker/test_misc.py @@ -33,6 +33,9 @@ class TestMisc (LinkCheckTest): def test_html5 (self): self.file_test("html5.html") + def test_utf8 (self): + self.file_test("utf8.html") + @need_network def test_archive (self): self.file_test("archive.html") From 84dbb5d6036cb61ba6a3ea6aa5ad9859b560702e Mon Sep 17 00:00:00 2001 From: Marius Gedminas Date: Mon, 21 Oct 2019 17:47:46 +0300 Subject: [PATCH 3/3] Fix TypeError: string arg required in find_links() Fixes #317. --- linkcheck/parser/__init__.py | 2 +- tests/checker/data/utf8.html.result | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/linkcheck/parser/__init__.py b/linkcheck/parser/__init__.py index 7f6ad51b..45f8e84d 100644 --- a/linkcheck/parser/__init__.py +++ b/linkcheck/parser/__init__.py @@ -130,7 +130,7 @@ def find_links (url_data, callback, tags): handler.parser = parser # parse try: - content = url_data.get_content() + content = url_data.get_raw_content() with parse_mutex: parser.feed(content) parser.flush() diff --git a/tests/checker/data/utf8.html.result b/tests/checker/data/utf8.html.result index e69de29b..814ffd71 100644 --- a/tests/checker/data/utf8.html.result +++ b/tests/checker/data/utf8.html.result @@ -0,0 +1,5 @@ +url file://%(curdir)s/%(datadir)s/utf8.html +cache key file://%(curdir)s/%(datadir)s/utf8.html +real url file://%(curdir)s/%(datadir)s/utf8.html +name %(datadir)s/utf8.html +valid