mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-13 19:01:03 +00:00
Fix error due to an empty html file accessed over http
Use the already fixed [1] UrlBase.get_content() in HttpUrl.
[1] 5bd1fb4 ("Fix internal error on empty HTML files", 2020-05-21)
This commit is contained in:
parent
d611564cb0
commit
f7337f55e8
2 changed files with 3 additions and 7 deletions
|
|
@ -299,11 +299,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||||
self.set_result(_("OK"))
|
self.set_result(_("OK"))
|
||||||
|
|
||||||
def get_content(self):
|
def get_content(self):
|
||||||
if self.text is None:
|
return super().get_content(self.encoding)
|
||||||
self.get_raw_content()
|
|
||||||
self.soup = htmlsoup.make_soup(self.data, self.encoding)
|
|
||||||
self.text = self.data.decode(self.soup.original_encoding)
|
|
||||||
return self.text
|
|
||||||
|
|
||||||
def read_content(self):
|
def read_content(self):
|
||||||
"""Return data and data size for this URL.
|
"""Return data and data size for this URL.
|
||||||
|
|
|
||||||
|
|
@ -628,10 +628,10 @@ class UrlBase:
|
||||||
self.data = self.download_content()
|
self.data = self.download_content()
|
||||||
return self.data
|
return self.data
|
||||||
|
|
||||||
def get_content(self):
|
def get_content(self, encoding=None):
|
||||||
if self.text is None:
|
if self.text is None:
|
||||||
self.get_raw_content()
|
self.get_raw_content()
|
||||||
self.soup = htmlsoup.make_soup(self.data)
|
self.soup = htmlsoup.make_soup(self.data, encoding)
|
||||||
# Sometimes soup.original_encoding is None! Better mangled text
|
# Sometimes soup.original_encoding is None! Better mangled text
|
||||||
# than an internal crash, eh? ISO-8859-1 is a safe fallback in the
|
# than an internal crash, eh? ISO-8859-1 is a safe fallback in the
|
||||||
# sense that any binary blob can be decoded, it'll never cause a
|
# sense that any binary blob can be decoded, it'll never cause a
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue