mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-23 07:34:44 +00:00
Clarify the decision to fall back to Latin-1
This commit is contained in:
parent
5bd1fb4e36
commit
c60d7c66e4
1 changed files with 3 additions and 1 deletions
|
|
@ -633,7 +633,9 @@ class UrlBase:
|
|||
self.get_raw_content()
|
||||
self.soup = htmlsoup.make_soup(self.data)
|
||||
# Sometimes soup.original_encoding is None! Better mangled text
|
||||
# than an internal crash, eh?
|
||||
# than an internal crash, eh? ISO-8859-1 is a safe fallback in the
|
||||
# sense that any binary blob can be decoded, it'll never cause a
|
||||
# UnicodeDecodeError.
|
||||
self.encoding = self.soup.original_encoding or 'ISO-8859-1'
|
||||
self.text = self.data.decode(self.encoding)
|
||||
return self.text
|
||||
|
|
|
|||
Loading…
Reference in a new issue