mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 06:20:27 +00:00
Enable AnchorCheck to be used with local files
[I] discovered that fileurl.py was stripping the anchors from url_data, which breaks AnchorCheck. So I stopped it from doing that, and tried to fix up all the places that were assuming the url would map to a filesystem file. The tests all pass, but I'm not 100% sure I caught all the cases, or fixed them correctly.
This commit is contained in:
parent
a29750c57f
commit
c221afdab5
2 changed files with 20 additions and 10 deletions
|
|
@ -122,6 +122,14 @@ class FileUrl(urlbase.UrlBase):
|
|||
)
|
||||
self.scheme = 'file'
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
# the local file URI
|
||||
self.url_without_anchor = None
|
||||
# including the anchor in self.url allows the AnchorCheck plugin to be
|
||||
# used when checking files. The anchor is stripped in UrlBase.set_cache_url()
|
||||
# if AnchorCheck is not being used.
|
||||
|
||||
def build_base_url(self):
|
||||
"""The URL is normed according to the platform:
|
||||
- the base URL is made an absolute *file://* URL
|
||||
|
|
@ -171,14 +179,15 @@ class FileUrl(urlbase.UrlBase):
|
|||
urlparts[3] = ''
|
||||
self.base_url = urlutil.urlunsplit(urlparts)
|
||||
super().build_url()
|
||||
# ignore query and fragment url parts for filesystem urls
|
||||
self.urlparts[3] = self.urlparts[4] = ''
|
||||
# ignore query url part for filesystem urls
|
||||
self.urlparts[3] = ''
|
||||
if self.is_directory() and not self.urlparts[2].endswith('/'):
|
||||
self.add_warning(
|
||||
_("Added trailing slash to directory."), tag=WARN_FILE_MISSING_SLASH
|
||||
)
|
||||
self.urlparts[2] += '/'
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
self.url_without_anchor = urlutil.urlunsplit(self.urlparts[:4] + [''])
|
||||
|
||||
def add_size_info(self):
|
||||
"""Get size of file content and modification time from filename path."""
|
||||
|
|
@ -204,7 +213,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
if self.is_directory():
|
||||
self.set_result(_("directory"))
|
||||
else:
|
||||
url = fileutil.path_safe(self.url)
|
||||
url = fileutil.path_safe(self.url_without_anchor)
|
||||
self.url_connection = urllib.request.urlopen(url)
|
||||
self.check_case_sensitivity()
|
||||
|
||||
|
|
@ -270,7 +279,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
"""
|
||||
if self.is_directory():
|
||||
return True
|
||||
if firefox.has_sqlite and firefox.extension.search(self.url):
|
||||
if firefox.has_sqlite and firefox.extension.search(self.url_without_anchor):
|
||||
return True
|
||||
return self.is_content_type_parseable()
|
||||
|
||||
|
|
@ -278,7 +287,8 @@ class FileUrl(urlbase.UrlBase):
|
|||
"""Return URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
if self.url:
|
||||
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
|
||||
self.content_type = mimeutil.guess_mimetype(
|
||||
self.url_without_anchor, read=self.get_content)
|
||||
else:
|
||||
self.content_type = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -27,18 +27,18 @@ class TestAnchor(LinkCheckTest):
|
|||
|
||||
def test_anchor(self):
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
anchor = "broken"
|
||||
urlanchor = url + "#" + anchor
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html#%(anchor)s" % self.get_attrs(
|
||||
anchor=anchor)
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
"url %s" % urlanchor,
|
||||
"url %s" % url,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning Anchor `%s' not found. Available anchors: `myid:'." % anchor,
|
||||
"valid",
|
||||
]
|
||||
self.direct(urlanchor, resultlines, confargs=confargs)
|
||||
self.direct(url, resultlines, confargs=confargs)
|
||||
|
||||
|
||||
class TestHttpAnchor(HttpServerTest):
|
||||
|
|
|
|||
Loading…
Reference in a new issue