remove anchor from HEAD and GET requests

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@853 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-04-28 13:08:00 +00:00
parent db6fbedcef
commit 703abd1ca7
2 changed files with 11 additions and 5 deletions

View file

@ -45,7 +45,7 @@ class HttpUrlData (ProxyUrlData):
if not self.urlparts[2]:
self.setWarning(i18n._("URL path is empty, assuming '/' as path"))
self.urlparts[2] = '/'
self.url = urlparse.urlunsplit(self.urlparts)
self.url = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
def checkConnection (self):
@ -127,7 +127,11 @@ class HttpUrlData (ProxyUrlData):
self.headers.getheader("Uri", ""))
redirected = urlparse.urljoin(redirected, newurl)
redirected = unquote(redirected)
# note: urlparts has to be a list
self.urlparts = list(urlparse.urlsplit(redirected))
# preserve anchor on redirects
self.urlparts[4] = self.anchor
# new response data
response = self._getHttpResponse()
self.headers = response.msg
debug(BRING_IT_ON, "Redirected", self.headers)
@ -178,7 +182,7 @@ class HttpUrlData (ProxyUrlData):
self.headers = response.msg
if response.status not in [301,302]: break
effectiveurl = urlparse.urlunsplit(self.urlparts)
effectiveurl = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
if self.url != effectiveurl:
self.setWarning(i18n._("Effective URL %s") % effectiveurl)
self.url = effectiveurl

View file

@ -186,7 +186,9 @@ class UrlData:
% str(port))
# set host lowercase and without userinfo
self.urlparts[1] = host.lower()
# safe anchor for later checking and delete it from url parts
self.anchor = self.urlparts[4]
self.urlparts[4] = ''
def logMe (self):
@ -311,11 +313,11 @@ class UrlData:
# remember that the host is lowercase
if self.urlparts:
if self.config["noanchorcaching"]:
# remove anchor from cache key
return urlparse.urlunsplit(self.urlparts[:4]+[''])
# removed anchor from cache key
return urlparse.urlunsplit(self.urlparts)
else:
# do not ignore anchor
return urlparse.urlunsplit(self.urlparts)
return urlparse.urlunsplit(self.urlparts[:4]+[self.anchor])
return None