From b570df925be050fa64b88922b1ff8a2e7364fb7b Mon Sep 17 00:00:00 2001 From: calvin Date: Thu, 6 Feb 2003 01:23:36 +0000 Subject: [PATCH] fix amazon timeouts git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@785 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- ChangeLog | 5 +++++ linkcheck/HttpUrlData.py | 16 ++++++++++++---- 2 files changed, 17 insertions(+), 4 deletions(-) diff --git a/ChangeLog b/ChangeLog index 70a91fd7..bafb906b 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +1.8.8 + * All amazon servers block HEAD requests with timeouts. Use GET as + a workaround, but issue a warning. + Changed files: linkcheck/HttpUrlData.py + 1.8.7 * #define YY_NO_UNISTD_H on Windows systems, fixes build error with Visual Studio compiler diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 99365eee..438b4666 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -26,6 +26,8 @@ from ProxyUrlData import ProxyUrlData _supported_encodings = ('gzip', 'x-gzip', 'deflate') +_isAmazonHost = re.compile(r'www\.amazon\.(com|de|ca|fr|co\.(uk|jp))').match + class HttpUrlData (ProxyUrlData): "Url link with http scheme" netscape_re = re.compile("Netscape-Enterprise/") @@ -93,6 +95,10 @@ class HttpUrlData (ProxyUrlData): self.setWarning(i18n._("Access denied by robots.txt, checked only syntax")) return + # amazon servers suck + if _isAmazonHost(self.urlparts[1]): + self.setWarning(i18n._("Amazon servers block HTTP HEAD requests, " + "using GET instead")) # first try response = self._getHttpResponse() self.headers = response.msg @@ -156,11 +162,11 @@ class HttpUrlData (ProxyUrlData): self.headers = response.msg elif self.headers: type = self.headers.gettype() - poweredby = self.headers.getheader('X-Powered-By') - server = self.headers.getheader('Server') + poweredby = self.headers.get('X-Powered-By', '') + server = self.headers.get('Server', '') if type=='application/octet-stream' and \ - ((poweredby and poweredby[:4]=='Zope') or \ - (server and server[:4]=='Zope')): + (poweredby.startswith('Zope') or \ + server.startswith('Zope')): self.setWarning(i18n._("Zope Server cannot determine" " MIME type with HEAD, falling back to GET")) response = self._getHttpResponse("GET") @@ -205,6 +211,8 @@ class HttpUrlData (ProxyUrlData): """Put request and return (status code, status text, mime object). host can be host:port format """ + if _isAmazonHost(self.urlparts[1]): + method = "GET" if self.proxy: host = self.proxy else: