Fix TypeError: string arg required in content_allows_robots()

See #323 an #317.
This commit is contained in:
Marius Gedminas 2019-10-22 14:12:38 +03:00
parent 6a9ab5ae44
commit 58b0d5aaae
4 changed files with 11 additions and 2 deletions

View file

@ -88,7 +88,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
parser.encoding = self.charset
# parse
try:
parser.feed(self.get_content())
parser.feed(self.get_raw_content())
parser.flush()
except linkparse.StopParse as msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)

View file

@ -0,0 +1,5 @@
<!DOCTYPE html>
<html lang="en">
<meta charset="utf-8">
<p>Some text — but with an em-dash.</p>
</html>

View file

@ -0,0 +1,4 @@
url http://localhost:%(port)d/%(datadir)s/http_utf8.html
cache key http://localhost:%(port)d/%(datadir)s/http_utf8.html
real url http://localhost:%(port)d/%(datadir)s/http_utf8.html
valid

View file

@ -39,7 +39,7 @@ class TestHttp (HttpServerTest):
self.file_test("http_slash.html", confargs=confargs)
self.file_test("http.xhtml", confargs=confargs)
self.file_test("http_file.html", confargs=confargs)
self.file_test("utf8.html", confargs=confargs)
self.file_test("http_utf8.html", confargs=confargs)
def test_status(self):
for status in sorted(self.handler.responses.keys()):