From b30fb3b09cf1ee270c348ee44de3d9336ba98bb8 Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 16 Jun 2008 19:52:09 +0000 Subject: [PATCH] Remove duplicate code in http checker. git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3820 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/checker/const.py | 5 +++++ linkcheck/checker/httpurl.py | 17 +++++------------ 2 files changed, 10 insertions(+), 12 deletions(-) diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py index 28019d93..6f95fd4b 100644 --- a/linkcheck/checker/const.py +++ b/linkcheck/checker/const.py @@ -144,6 +144,11 @@ PARSE_MIMETYPES = ( "application/x-shockwave-flash", ) +HTML_MIMETYPES = ( + "text/html", + "application/xhtml+xml", +) + # if file extension lookup was unsuccessful, look at the content PARSE_CONTENTS = { "html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'), diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index af2631f2..50ad7af0 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -36,7 +36,7 @@ from .const import WARN_HTTP_ROBOTS_DENIED, WARN_HTTP_NO_ANCHOR_SUPPORT, \ WARN_HTTP_WRONG_REDIRECT, WARN_HTTP_MOVED_PERMANENT, \ WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_COOKIE_STORE_ERROR, \ WARN_HTTP_DECOMPRESS_ERROR, WARN_HTTP_UNSUPPORTED_ENCODING, \ - PARSE_MIMETYPES + PARSE_MIMETYPES, HTML_MIMETYPES # helper alias unicode_safe = strformat.unicode_safe @@ -606,7 +606,7 @@ Use URL %(newurl)s instead for checking.""") % { """ if not (self.valid and self.headers): return False - if headers.get_content_type(self.headers) not in ("text/html", "application/xhtml+xml"): + if headers.get_content_type(self.headers) not in HTML_MIMETYPES: return False return self.encoding_supported() @@ -638,23 +638,16 @@ Use URL %(newurl)s instead for checking.""") % { return False if headers.get_content_type(self.headers) not in PARSE_MIMETYPES: return False - encoding = headers.get_content_encoding(self.headers) - if encoding and encoding not in _supported_encodings and \ - encoding != 'identity': - self.add_warning(_('Unsupported content encoding %(encoding)r.') % - {"encoding": encoding}, - tag=WARN_HTTP_UNSUPPORTED_ENCODING) - return False - return True + return self.encoding_supported() def parse_url (self): """ Parse file contents for new links to check. """ ctype = headers.get_content_type(self.headers) - if ctype in ("text/html", "application/xhtml+xml"): + if self.is_html(): self.parse_html() - elif ctype == "text/css": + elif self.is_css(): self.parse_css() elif ctype == "application/x-shockwave-flash": self.parse_swf()