From 3d5c114f14a716c94e930e3293881697d9b97356 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sun, 7 Mar 2010 09:36:21 +0100 Subject: [PATCH] Warn on permament redirections even when URL is outside of domain filter. --- doc/changelog.txt | 5 ++++- linkcheck/checker/httpurl.py | 19 +++++++++++-------- 2 files changed, 15 insertions(+), 9 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 78f54c34..e1569ca2 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -7,10 +7,13 @@ Fixes: - dns: use /dev/urandom instead of /dev/random to get initial seed on Linux machines since the last one can block indefinitely. Closes: SF bug #2901667 -- checking: Retry if server closed connection and sent an empty +- http: Retry if server closed connection and sent an empty status line. Fixes the "BadStatusLine" errors. - installer: Add commandline executable to Windows installer. Closes: SF bug #2903257 +- http: warn about permanent redirections even when redirected URL is + outside of the domain filter. + Closes: SF bug #2920182 5.1 "Let the right one in" (released 04.08.2009) diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index c7502436..f09902bd 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -327,6 +327,7 @@ Use URL `%(newurl)s' instead for checking.""") % { self.set_extern(redirected) if self.extern[0] and self.extern[0]: if set_result: + self.check301status(response) self.add_info( _("The redirected URL is outside of the domain " "filter, checked only syntax.")) @@ -359,14 +360,8 @@ Use URL `%(newurl)s' instead for checking.""") % { self.aliases.append(redirected) # note: urlparts has to be a list self.urlparts = urlparts - if response.status == 301: - if not self.has301status: - if set_result: - self.add_warning( - _("HTTP 301 (moved permanent) encountered: you" - " should update this link."), - tag=WARN_HTTP_MOVED_PERMANENT) - self.has301status = True + if set_result: + self.check301status(response) # check cache again on the changed URL if self.aggregate.urlqueue.checked_redirect(redirected, self): return -1, response @@ -392,6 +387,14 @@ Use URL `%(newurl)s' instead for checking.""") % { tries += 1 return tries, response + def check301status (self, response): + """If response page has been permanently moved add a warning.""" + if response.status == 301 and not self.has301status: + self.add_warning(_("HTTP 301 (moved permanent) encountered: you" + " should update this link."), + tag=WARN_HTTP_MOVED_PERMANENT) + self.has301status = True + def get_alias_cache_data (self): """ Return all data values that should be put in the cache,