mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-02 20:04:43 +00:00
handle http redirections to non-http url
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@981 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
766b01e7a1
commit
6d724d04a4
3 changed files with 33 additions and 18 deletions
|
|
@ -25,7 +25,8 @@ from debug import *
|
|||
if get_debuglevel() > 0:
|
||||
robotparser.debug = 1
|
||||
from ProxyUrlData import ProxyUrlData
|
||||
from UrlData import ExcList
|
||||
from UrlData import ExcList, GetUrlDataFrom
|
||||
supportHttps = hasattr(httplib, "HTTPSConnection")
|
||||
|
||||
ExcList.extend([httplib.error,])
|
||||
|
||||
|
|
@ -137,8 +138,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
while response.status in [301,302] and self.headers and tries < 5:
|
||||
newurl = self.headers.getheader("Location",
|
||||
self.headers.getheader("Uri", ""))
|
||||
redirected = urlparse.urljoin(redirected, newurl)
|
||||
redirected = unquote(redirected)
|
||||
redirected = unquote(urlparse.urljoin(redirected, newurl))
|
||||
# note: urlparts has to be a list
|
||||
self.urlparts = list(urlparse.urlsplit(redirected))
|
||||
# check internal redirect cache to avoid recursion
|
||||
|
|
@ -153,11 +153,11 @@ class HttpUrlData (ProxyUrlData):
|
|||
if response.status == 301:
|
||||
if not has301status:
|
||||
self.setWarning(i18n._("HTTP 301 (moved permanent) encountered: you "
|
||||
"should update this link"))
|
||||
"should update this link."))
|
||||
if not (self.url.endswith('/') or self.url.endswith('.html')):
|
||||
self.setWarning(i18n._("A HTTP 301 redirection occured and the url has no "
|
||||
"trailing / at the end. All urls which point to (home) "
|
||||
"directories should end with a / to avoid redirection"))
|
||||
"directories should end with a / to avoid redirection."))
|
||||
has301status = 1
|
||||
self.aliases.append(redirected)
|
||||
# check cache again on possibly changed URL
|
||||
|
|
@ -167,6 +167,22 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.cached = 1
|
||||
self.logMe()
|
||||
return
|
||||
# check if we still have a http url, it could be another
|
||||
# scheme, eg https or news
|
||||
if self.urlparts[0]!="http":
|
||||
self.setWarning(i18n._("HTTP redirection to non-http url encountered; "
|
||||
"the original url was %s.") % `self.url`)
|
||||
# make new UrlData object
|
||||
newobj = GetUrlDataFrom(redirected, self.recursionLevel, self.config,
|
||||
parentName=self.parentName, baseRef=self.baseRef,
|
||||
line=self.line, column=self.column, name=self.name)
|
||||
newobj.warningString = self.warningString
|
||||
newobj.infoString = self.infoString
|
||||
# append new object to queue
|
||||
self.config.appendUrl(newobj)
|
||||
# pretend to be finished and logged
|
||||
self.cached = 1
|
||||
return
|
||||
# new response data
|
||||
response = self._getHttpResponse()
|
||||
self.headers = response.msg
|
||||
|
|
@ -271,7 +287,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
debug(HURT_ME_PLENTY, "host", host)
|
||||
if self.urlConnection:
|
||||
self.closeConnection()
|
||||
self.urlConnection = self._getHTTPObject(host)
|
||||
self.urlConnection = self.getHTTPObject(host)
|
||||
# quote parts before submit
|
||||
qurlparts = self.urlparts[:]
|
||||
qurlparts[2:5] = map(quote, self.urlparts[2:5])
|
||||
|
|
@ -304,8 +320,14 @@ class HttpUrlData (ProxyUrlData):
|
|||
return self.urlConnection.getresponse()
|
||||
|
||||
|
||||
def _getHTTPObject (self, host):
|
||||
h = httplib.HTTPConnection(host)
|
||||
def getHTTPObject (self, host):
|
||||
scheme = self.urlparts[0]
|
||||
if scheme=="http":
|
||||
h = httplib.HTTPConnection(host)
|
||||
elif scheme=="https":
|
||||
h = httplib.HTTPSConnection(host)
|
||||
else:
|
||||
raise IOError, "invalid url scheme %s" % scheme
|
||||
h.set_debuglevel(get_debuglevel())
|
||||
h.connect()
|
||||
return h
|
||||
|
|
|
|||
|
|
@ -18,22 +18,15 @@
|
|||
|
||||
import Config, httplib, i18n
|
||||
from UrlData import UrlData
|
||||
from HttpUrlData import HttpUrlData
|
||||
from HttpUrlData import HttpUrlData, supportHttps
|
||||
from linkcheck.debug import *
|
||||
_supportHttps = hasattr(httplib, "HTTPSConnection")
|
||||
|
||||
|
||||
class HttpsUrlData (HttpUrlData):
|
||||
"""Url link with https scheme"""
|
||||
|
||||
def _getHTTPObject (self, host):
|
||||
h = httplib.HTTPSConnection(host)
|
||||
h.set_debuglevel(get_debuglevel())
|
||||
h.connect()
|
||||
return h
|
||||
|
||||
def _check (self):
|
||||
if _supportHttps:
|
||||
if supportHttps:
|
||||
HttpUrlData._check(self)
|
||||
else:
|
||||
self.setWarning(i18n._("%s url ignored")%self.scheme.capitalize())
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@
|
|||
import sys, os, re, time, urlparse, Config, i18n
|
||||
from linkcheck import getLinkPat, checkUrls
|
||||
from linkcheck.log import strtime
|
||||
from UrlData import GetUrlDataFrom
|
||||
from linkcheck.UrlData import GetUrlDataFrom
|
||||
from types import StringType
|
||||
|
||||
_logfile = None
|
||||
|
|
|
|||
Loading…
Reference in a new issue