mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-21 08:20:25 +00:00
Fix error due to an empty html file accessed over http
Use the already fixed [1] UrlBase.get_content() in HttpUrl.
[1] 5bd1fb4 ("Fix internal error on empty HTML files", 2020-05-21)
This commit is contained in:
parent
d611564cb0
commit
f7337f55e8
2 changed files with 3 additions and 7 deletions
|
|
@ -299,11 +299,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.set_result(_("OK"))
|
||||
|
||||
def get_content(self):
|
||||
if self.text is None:
|
||||
self.get_raw_content()
|
||||
self.soup = htmlsoup.make_soup(self.data, self.encoding)
|
||||
self.text = self.data.decode(self.soup.original_encoding)
|
||||
return self.text
|
||||
return super().get_content(self.encoding)
|
||||
|
||||
def read_content(self):
|
||||
"""Return data and data size for this URL.
|
||||
|
|
|
|||
|
|
@ -628,10 +628,10 @@ class UrlBase:
|
|||
self.data = self.download_content()
|
||||
return self.data
|
||||
|
||||
def get_content(self):
|
||||
def get_content(self, encoding=None):
|
||||
if self.text is None:
|
||||
self.get_raw_content()
|
||||
self.soup = htmlsoup.make_soup(self.data)
|
||||
self.soup = htmlsoup.make_soup(self.data, encoding)
|
||||
# Sometimes soup.original_encoding is None! Better mangled text
|
||||
# than an internal crash, eh? ISO-8859-1 is a safe fallback in the
|
||||
# sense that any binary blob can be decoded, it'll never cause a
|
||||
|
|
|
|||
Loading…
Reference in a new issue