Fix error due to an empty html file accessed over http

Use the already fixed [1] UrlBase.get_content() in HttpUrl.

[1] 5bd1fb4 ("Fix internal error on empty HTML files", 2020-05-21)
This commit is contained in:
Chris Mayo 2020-05-23 20:01:24 +01:00
parent d611564cb0
commit f7337f55e8
2 changed files with 3 additions and 7 deletions

View file

@ -299,11 +299,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.set_result(_("OK"))
def get_content(self):
if self.text is None:
self.get_raw_content()
self.soup = htmlsoup.make_soup(self.data, self.encoding)
self.text = self.data.decode(self.soup.original_encoding)
return self.text
return super().get_content(self.encoding)
def read_content(self):
"""Return data and data size for this URL.

View file

@ -628,10 +628,10 @@ class UrlBase:
self.data = self.download_content()
return self.data
def get_content(self):
def get_content(self, encoding=None):
if self.text is None:
self.get_raw_content()
self.soup = htmlsoup.make_soup(self.data)
self.soup = htmlsoup.make_soup(self.data, encoding)
# Sometimes soup.original_encoding is None! Better mangled text
# than an internal crash, eh? ISO-8859-1 is a safe fallback in the
# sense that any binary blob can be decoded, it'll never cause a