From 9de237b4c29348db041bfde4d563b68226a7cd1f Mon Sep 17 00:00:00 2001 From: calvin Date: Wed, 21 Mar 2007 19:32:19 +0000 Subject: [PATCH] Check that charset is not None before lowering it in set_encoding(). git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3547 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/HtmlParser/__init__.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/linkcheck/HtmlParser/__init__.py b/linkcheck/HtmlParser/__init__.py index d97acc25..678ea48c 100644 --- a/linkcheck/HtmlParser/__init__.py +++ b/linkcheck/HtmlParser/__init__.py @@ -208,6 +208,7 @@ def resolve_entities (s): """ return _entity_re.sub(_resolve_entity, s) +SUPPORTED_CHARSETS = ["utf-8", "iso-8859-1", "iso-8859-15"] _encoding_ro = re.compile(r"charset=(?P[-0-9a-zA-Z]+)") @@ -223,7 +224,7 @@ def set_encoding (parsobj, attrs): if attrs.get_true('http-equiv', u'').lower() == u"content-type": charset = attrs.get_true('content', u'') charset = get_ctype_charset(charset.encode('ascii', 'ignore')) - if charset is not None: + if charset and charset.lower() in SUPPORTED_CHARSETS: parsobj.encoding = charset