mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-09 15:14:45 +00:00
remove anchor from HEAD and GET requests
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@853 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
db6fbedcef
commit
703abd1ca7
2 changed files with 11 additions and 5 deletions
|
|
@ -45,7 +45,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
if not self.urlparts[2]:
|
||||
self.setWarning(i18n._("URL path is empty, assuming '/' as path"))
|
||||
self.urlparts[2] = '/'
|
||||
self.url = urlparse.urlunsplit(self.urlparts)
|
||||
self.url = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
|
||||
|
||||
|
||||
def checkConnection (self):
|
||||
|
|
@ -127,7 +127,11 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.headers.getheader("Uri", ""))
|
||||
redirected = urlparse.urljoin(redirected, newurl)
|
||||
redirected = unquote(redirected)
|
||||
# note: urlparts has to be a list
|
||||
self.urlparts = list(urlparse.urlsplit(redirected))
|
||||
# preserve anchor on redirects
|
||||
self.urlparts[4] = self.anchor
|
||||
# new response data
|
||||
response = self._getHttpResponse()
|
||||
self.headers = response.msg
|
||||
debug(BRING_IT_ON, "Redirected", self.headers)
|
||||
|
|
@ -178,7 +182,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
self.headers = response.msg
|
||||
if response.status not in [301,302]: break
|
||||
|
||||
effectiveurl = urlparse.urlunsplit(self.urlparts)
|
||||
effectiveurl = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
|
||||
if self.url != effectiveurl:
|
||||
self.setWarning(i18n._("Effective URL %s") % effectiveurl)
|
||||
self.url = effectiveurl
|
||||
|
|
|
|||
|
|
@ -186,7 +186,9 @@ class UrlData:
|
|||
% str(port))
|
||||
# set host lowercase and without userinfo
|
||||
self.urlparts[1] = host.lower()
|
||||
# safe anchor for later checking and delete it from url parts
|
||||
self.anchor = self.urlparts[4]
|
||||
self.urlparts[4] = ''
|
||||
|
||||
|
||||
def logMe (self):
|
||||
|
|
@ -311,11 +313,11 @@ class UrlData:
|
|||
# remember that the host is lowercase
|
||||
if self.urlparts:
|
||||
if self.config["noanchorcaching"]:
|
||||
# remove anchor from cache key
|
||||
return urlparse.urlunsplit(self.urlparts[:4]+[''])
|
||||
# removed anchor from cache key
|
||||
return urlparse.urlunsplit(self.urlparts)
|
||||
else:
|
||||
# do not ignore anchor
|
||||
return urlparse.urlunsplit(self.urlparts)
|
||||
return urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
|
||||
return None
|
||||
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue