diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index 59820060..d27b6d4a 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -122,6 +122,14 @@ class FileUrl(urlbase.UrlBase): ) self.scheme = 'file' + def reset(self): + super().reset() + # the local file URI + self.url_without_anchor = None + # including the anchor in self.url allows the AnchorCheck plugin to be + # used when checking files. The anchor is stripped in UrlBase.set_cache_url() + # if AnchorCheck is not being used. + def build_base_url(self): """The URL is normed according to the platform: - the base URL is made an absolute *file://* URL @@ -171,14 +179,15 @@ class FileUrl(urlbase.UrlBase): urlparts[3] = '' self.base_url = urlutil.urlunsplit(urlparts) super().build_url() - # ignore query and fragment url parts for filesystem urls - self.urlparts[3] = self.urlparts[4] = '' + # ignore query url part for filesystem urls + self.urlparts[3] = '' if self.is_directory() and not self.urlparts[2].endswith('/'): self.add_warning( _("Added trailing slash to directory."), tag=WARN_FILE_MISSING_SLASH ) self.urlparts[2] += '/' self.url = urlutil.urlunsplit(self.urlparts) + self.url_without_anchor = urlutil.urlunsplit(self.urlparts[:4] + ['']) def add_size_info(self): """Get size of file content and modification time from filename path.""" @@ -204,7 +213,7 @@ class FileUrl(urlbase.UrlBase): if self.is_directory(): self.set_result(_("directory")) else: - url = fileutil.path_safe(self.url) + url = fileutil.path_safe(self.url_without_anchor) self.url_connection = urllib.request.urlopen(url) self.check_case_sensitivity() @@ -270,7 +279,7 @@ class FileUrl(urlbase.UrlBase): """ if self.is_directory(): return True - if firefox.has_sqlite and firefox.extension.search(self.url): + if firefox.has_sqlite and firefox.extension.search(self.url_without_anchor): return True return self.is_content_type_parseable() @@ -278,7 +287,8 @@ class FileUrl(urlbase.UrlBase): """Return URL content type, or an empty string if content type could not be found.""" if self.url: - self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content) + self.content_type = mimeutil.guess_mimetype( + self.url_without_anchor, read=self.get_content) else: self.content_type = "" diff --git a/tests/checker/test_anchor.py b/tests/checker/test_anchor.py index 7848e9e1..186ca05d 100644 --- a/tests/checker/test_anchor.py +++ b/tests/checker/test_anchor.py @@ -27,18 +27,18 @@ class TestAnchor(LinkCheckTest): def test_anchor(self): confargs = {"enabledplugins": ["AnchorCheck"]} - url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs() - nurl = self.norm(url) anchor = "broken" - urlanchor = url + "#" + anchor + url = "file://%(curdir)s/%(datadir)s/anchor.html#%(anchor)s" % self.get_attrs( + anchor=anchor) + nurl = self.norm(url) resultlines = [ - "url %s" % urlanchor, + "url %s" % url, "cache key %s" % nurl, "real url %s" % nurl, "warning Anchor `%s' not found. Available anchors: `myid:'." % anchor, "valid", ] - self.direct(urlanchor, resultlines, confargs=confargs) + self.direct(url, resultlines, confargs=confargs) class TestHttpAnchor(HttpServerTest):