diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index ed30b368..c16a1ef8 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -45,7 +45,7 @@ class HttpUrlData (ProxyUrlData): if not self.urlparts[2]: self.setWarning(i18n._("URL path is empty, assuming '/' as path")) self.urlparts[2] = '/' - self.url = urlparse.urlunsplit(self.urlparts) + self.url = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor]) def checkConnection (self): @@ -127,7 +127,11 @@ class HttpUrlData (ProxyUrlData): self.headers.getheader("Uri", "")) redirected = urlparse.urljoin(redirected, newurl) redirected = unquote(redirected) + # note: urlparts has to be a list self.urlparts = list(urlparse.urlsplit(redirected)) + # preserve anchor on redirects + self.urlparts[4] = self.anchor + # new response data response = self._getHttpResponse() self.headers = response.msg debug(BRING_IT_ON, "Redirected", self.headers) @@ -178,7 +182,7 @@ class HttpUrlData (ProxyUrlData): self.headers = response.msg if response.status not in [301,302]: break - effectiveurl = urlparse.urlunsplit(self.urlparts) + effectiveurl = urlparse.urlunsplit(self.urlparts[:4]+[self.anchor]) if self.url != effectiveurl: self.setWarning(i18n._("Effective URL %s") % effectiveurl) self.url = effectiveurl diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index cd1bf6bb..813d2d5d 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -186,7 +186,9 @@ class UrlData: % str(port)) # set host lowercase and without userinfo self.urlparts[1] = host.lower() + # safe anchor for later checking and delete it from url parts self.anchor = self.urlparts[4] + self.urlparts[4] = '' def logMe (self): @@ -311,11 +313,11 @@ class UrlData: # remember that the host is lowercase if self.urlparts: if self.config["noanchorcaching"]: - # remove anchor from cache key - return urlparse.urlunsplit(self.urlparts[:4]+['']) + # removed anchor from cache key + return urlparse.urlunsplit(self.urlparts) else: # do not ignore anchor - return urlparse.urlunsplit(self.urlparts) + return urlparse.urlunsplit(self.urlparts[:4]+[self.anchor]) return None