Enable AnchorCheck to be used with local files

[I] discovered that fileurl.py was stripping the anchors from url_data,
which breaks AnchorCheck. So I stopped it from doing that, and
tried to fix up all the places that were assuming the url would map to a
filesystem file. The tests all pass, but I'm not 100% sure I caught all
the cases, or fixed them correctly.
This commit is contained in:
Nathan Arthur 2022-10-03 19:33:05 +01:00 committed by Chris Mayo
parent a29750c57f
commit c221afdab5
2 changed files with 20 additions and 10 deletions

View file

@ -122,6 +122,14 @@ class FileUrl(urlbase.UrlBase):
)
self.scheme = 'file'
def reset(self):
super().reset()
# the local file URI
self.url_without_anchor = None
# including the anchor in self.url allows the AnchorCheck plugin to be
# used when checking files. The anchor is stripped in UrlBase.set_cache_url()
# if AnchorCheck is not being used.
def build_base_url(self):
"""The URL is normed according to the platform:
- the base URL is made an absolute *file://* URL
@ -171,14 +179,15 @@ class FileUrl(urlbase.UrlBase):
urlparts[3] = ''
self.base_url = urlutil.urlunsplit(urlparts)
super().build_url()
# ignore query and fragment url parts for filesystem urls
self.urlparts[3] = self.urlparts[4] = ''
# ignore query url part for filesystem urls
self.urlparts[3] = ''
if self.is_directory() and not self.urlparts[2].endswith('/'):
self.add_warning(
_("Added trailing slash to directory."), tag=WARN_FILE_MISSING_SLASH
)
self.urlparts[2] += '/'
self.url = urlutil.urlunsplit(self.urlparts)
self.url_without_anchor = urlutil.urlunsplit(self.urlparts[:4] + [''])
def add_size_info(self):
"""Get size of file content and modification time from filename path."""
@ -204,7 +213,7 @@ class FileUrl(urlbase.UrlBase):
if self.is_directory():
self.set_result(_("directory"))
else:
url = fileutil.path_safe(self.url)
url = fileutil.path_safe(self.url_without_anchor)
self.url_connection = urllib.request.urlopen(url)
self.check_case_sensitivity()
@ -270,7 +279,7 @@ class FileUrl(urlbase.UrlBase):
"""
if self.is_directory():
return True
if firefox.has_sqlite and firefox.extension.search(self.url):
if firefox.has_sqlite and firefox.extension.search(self.url_without_anchor):
return True
return self.is_content_type_parseable()
@ -278,7 +287,8 @@ class FileUrl(urlbase.UrlBase):
"""Return URL content type, or an empty string if content
type could not be found."""
if self.url:
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
self.content_type = mimeutil.guess_mimetype(
self.url_without_anchor, read=self.get_content)
else:
self.content_type = ""

View file

@ -27,18 +27,18 @@ class TestAnchor(LinkCheckTest):
def test_anchor(self):
confargs = {"enabledplugins": ["AnchorCheck"]}
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
nurl = self.norm(url)
anchor = "broken"
urlanchor = url + "#" + anchor
url = "file://%(curdir)s/%(datadir)s/anchor.html#%(anchor)s" % self.get_attrs(
anchor=anchor)
nurl = self.norm(url)
resultlines = [
"url %s" % urlanchor,
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % nurl,
"warning Anchor `%s' not found. Available anchors: `myid:'." % anchor,
"valid",
]
self.direct(urlanchor, resultlines, confargs=confargs)
self.direct(url, resultlines, confargs=confargs)
class TestHttpAnchor(HttpServerTest):