diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index e0c79254..a0ed556f 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -299,11 +299,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport): self.set_result(_("OK")) def get_content(self): - if self.text is None: - self.get_raw_content() - self.soup = htmlsoup.make_soup(self.data, self.encoding) - self.text = self.data.decode(self.soup.original_encoding) - return self.text + return super().get_content(self.encoding) def read_content(self): """Return data and data size for this URL. diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 1b288ba5..cccb4f38 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -628,10 +628,10 @@ class UrlBase: self.data = self.download_content() return self.data - def get_content(self): + def get_content(self, encoding=None): if self.text is None: self.get_raw_content() - self.soup = htmlsoup.make_soup(self.data) + self.soup = htmlsoup.make_soup(self.data, encoding) # Sometimes soup.original_encoding is None! Better mangled text # than an internal crash, eh? ISO-8859-1 is a safe fallback in the # sense that any binary blob can be decoded, it'll never cause a