diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 809a6d0e..498e279f 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -88,7 +88,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): parser.encoding = self.charset # parse try: - parser.feed(self.get_content()) + parser.feed(self.get_raw_content()) parser.flush() except linkparse.StopParse as msg: log.debug(LOG_CHECK, "Stopped parsing: %s", msg) diff --git a/tests/checker/data/http_utf8.html b/tests/checker/data/http_utf8.html new file mode 100644 index 00000000..c369b145 --- /dev/null +++ b/tests/checker/data/http_utf8.html @@ -0,0 +1,5 @@ + + + +

Some text — but with an em-dash.

+ diff --git a/tests/checker/data/http_utf8.html.result b/tests/checker/data/http_utf8.html.result new file mode 100644 index 00000000..437e540b --- /dev/null +++ b/tests/checker/data/http_utf8.html.result @@ -0,0 +1,4 @@ +url http://localhost:%(port)d/%(datadir)s/http_utf8.html +cache key http://localhost:%(port)d/%(datadir)s/http_utf8.html +real url http://localhost:%(port)d/%(datadir)s/http_utf8.html +valid diff --git a/tests/checker/test_http.py b/tests/checker/test_http.py index 76662c3c..e73f3f2e 100644 --- a/tests/checker/test_http.py +++ b/tests/checker/test_http.py @@ -39,7 +39,7 @@ class TestHttp (HttpServerTest): self.file_test("http_slash.html", confargs=confargs) self.file_test("http.xhtml", confargs=confargs) self.file_test("http_file.html", confargs=confargs) - self.file_test("utf8.html", confargs=confargs) + self.file_test("http_utf8.html", confargs=confargs) def test_status(self): for status in sorted(self.handler.responses.keys()):