From 6fac69cddb7e04c6b2425c19fbbfe4121b2db030 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Sun, 21 Nov 2010 19:50:51 +0100 Subject: [PATCH] Fall back to GET when connection is reset. --- doc/changelog.txt | 3 +++ linkcheck/checker/httpurl.py | 13 +++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index f4ac9c28..96488a88 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -1,6 +1,9 @@ 6.0 "" (released xx.xx.2010) Fixes: +- checking: Fall back to HTTP GET requests when the connection has + been resets since some servers tend to do this for HEAD requests. + Closes: SF bug #3114622 Changes: - dependencies: Require and use Python 2.6. diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 6f96846f..4c0c604d 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -21,6 +21,7 @@ Handle http links. import urlparse import urllib import re +import errno import zlib import socket from cStringIO import StringIO @@ -40,8 +41,7 @@ from .const import WARN_HTTP_ROBOTS_DENIED, \ # helper alias unicode_safe = strformat.unicode_safe -supportHttps = hasattr(httplib, "HTTPSConnection") and \ - hasattr(socket, "ssl") +supportHttps = hasattr(httplib, "HTTPSConnection") _supported_encodings = ('gzip', 'x-gzip', 'deflate') @@ -185,6 +185,15 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): self.fallback_get = True continue raise + except socket.error, msg: + # some servers reset the connection on HEAD requests + if self.method == "HEAD" and self.method_get_allowed and \ + msg[0] == errno.ECONNRESET: + self.method = "GET" + self.aliases = [] + self.fallback_get = True + continue + raise if response.reason: response.reason = unicode_safe(response.reason) log.debug(LOG_CHECK,