mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-15 20:01:03 +00:00
ISO-8859-1 is a fallback for Requests and causes us to mangle UTF-8
content.
Requests' utils.py:
def get_encoding_from_headers(headers):
"""Returns encodings from given HTTP Header Dict.
:param headers: dictionary to extract encoding from.
:rtype: str
"""
content_type = headers.get('content-type')
if not content_type:
return None
content_type, params = _parse_content_type_header(content_type)
if 'charset' in params:
return params['charset'].strip("'\"")
if 'text' in content_type:
return 'ISO-8859-1'
if 'application/json' in content_type:
# Assume UTF-8 based on RFC 4627: https://www.ietf.org/rfc/rfc4627.txt since the charset was unset
return 'utf-8'
|
||
|---|---|---|
| .. | ||
| __init__.py | ||
| const.py | ||
| dnsurl.py | ||
| fileurl.py | ||
| ftpurl.py | ||
| httpurl.py | ||
| ignoreurl.py | ||
| internpaturl.py | ||
| itmsservicesurl.py | ||
| mailtourl.py | ||
| nntpurl.py | ||
| proxysupport.py | ||
| telneturl.py | ||
| unknownurl.py | ||
| urlbase.py | ||