Merge pull request #325 from linkchecker/type-error-in-robot-parser

Fix TypeError: string arg required in content_allows_robots()
This commit is contained in:
Marius Gedminas 2019-10-22 18:07:31 +03:00 committed by GitHub
commit c6de64978c
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 11 additions and 1 deletions

View file

@ -88,7 +88,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
parser.encoding = self.charset
# parse
try:
parser.feed(self.get_content())
parser.feed(self.get_raw_content())
parser.flush()
except linkparse.StopParse as msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)

View file

@ -0,0 +1,5 @@
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<p>Some text — but with an em-dash.</p>
</html>

View file

@ -0,0 +1,4 @@
url http://localhost:%(port)d/%(datadir)s/http_utf8.html
cache key http://localhost:%(port)d/%(datadir)s/http_utf8.html
real url http://localhost:%(port)d/%(datadir)s/http_utf8.html
valid

View file

@ -39,6 +39,7 @@ class TestHttp (HttpServerTest):
self.file_test("http_slash.html", confargs=confargs)
self.file_test("http.xhtml", confargs=confargs)
self.file_test("http_file.html", confargs=confargs)
self.file_test("http_utf8.html", confargs=confargs)
def test_status(self):
for status in sorted(self.handler.responses.keys()):