mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-20 12:21:53 +00:00
Merge pull request #669 from cjmayo/anchorcheck
Re-enable AnchorCheck plugin
This commit is contained in:
commit
d9265bb71c
14 changed files with 313 additions and 51 deletions
|
|
@ -474,10 +474,6 @@ AnchorCheck
|
|||
|
||||
Checks validity of HTML anchors.
|
||||
|
||||
.. note::
|
||||
|
||||
The AnchorCheck plugin is currently broken and is disabled.
|
||||
|
||||
LocationInfo
|
||||
^^^^^^^^^^^^
|
||||
|
||||
|
|
|
|||
|
|
@ -122,6 +122,14 @@ class FileUrl(urlbase.UrlBase):
|
|||
)
|
||||
self.scheme = 'file'
|
||||
|
||||
def reset(self):
|
||||
super().reset()
|
||||
# the local file URI
|
||||
self.url_without_anchor = None
|
||||
# including the anchor in self.url allows the AnchorCheck plugin to be
|
||||
# used when checking files. The anchor is stripped in UrlBase.set_cache_url()
|
||||
# if AnchorCheck is not being used.
|
||||
|
||||
def build_base_url(self):
|
||||
"""The URL is normed according to the platform:
|
||||
- the base URL is made an absolute *file://* URL
|
||||
|
|
@ -162,23 +170,20 @@ class FileUrl(urlbase.UrlBase):
|
|||
# of the base URL are removed first.
|
||||
# Otherwise the join function thinks the query is part of
|
||||
# the file name.
|
||||
from .urlbase import url_norm
|
||||
|
||||
# norm base url - can raise UnicodeError from url.idna_encode()
|
||||
base_url, is_idn = url_norm(self.base_url, self.encoding)
|
||||
urlparts = list(urllib.parse.urlsplit(base_url))
|
||||
urlparts = list(urllib.parse.urlsplit(self.base_url))
|
||||
# ignore query part for filesystem urls
|
||||
urlparts[3] = ''
|
||||
self.base_url = urlutil.urlunsplit(urlparts)
|
||||
super().build_url()
|
||||
# ignore query and fragment url parts for filesystem urls
|
||||
self.urlparts[3] = self.urlparts[4] = ''
|
||||
# ignore query url part for filesystem urls
|
||||
self.urlparts[3] = ''
|
||||
if self.is_directory() and not self.urlparts[2].endswith('/'):
|
||||
self.add_warning(
|
||||
_("Added trailing slash to directory."), tag=WARN_FILE_MISSING_SLASH
|
||||
)
|
||||
self.urlparts[2] += '/'
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
self.url_without_anchor = urlutil.urlunsplit(self.urlparts[:4] + [''])
|
||||
|
||||
def add_size_info(self):
|
||||
"""Get size of file content and modification time from filename path."""
|
||||
|
|
@ -204,7 +209,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
if self.is_directory():
|
||||
self.set_result(_("directory"))
|
||||
else:
|
||||
url = fileutil.path_safe(self.url)
|
||||
url = fileutil.path_safe(self.url_without_anchor)
|
||||
self.url_connection = urllib.request.urlopen(url)
|
||||
self.check_case_sensitivity()
|
||||
|
||||
|
|
@ -270,7 +275,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
"""
|
||||
if self.is_directory():
|
||||
return True
|
||||
if firefox.has_sqlite and firefox.extension.search(self.url):
|
||||
if firefox.has_sqlite and firefox.extension.search(self.url_without_anchor):
|
||||
return True
|
||||
return self.is_content_type_parseable()
|
||||
|
||||
|
|
@ -278,7 +283,8 @@ class FileUrl(urlbase.UrlBase):
|
|||
"""Return URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
if self.url:
|
||||
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
|
||||
self.content_type = mimeutil.guess_mimetype(
|
||||
self.url_without_anchor, read=self.get_content)
|
||||
else:
|
||||
self.content_type = ""
|
||||
|
||||
|
|
|
|||
|
|
@ -458,7 +458,7 @@ class UrlBase:
|
|||
self.base_ref = urljoin(self.parent_url, self.base_ref)
|
||||
self.url = urljoin(self.base_ref, base_url)
|
||||
elif self.parent_url:
|
||||
# strip the parent url query and anchor
|
||||
# strip the parent url anchor
|
||||
urlparts = list(urllib.parse.urlsplit(self.parent_url))
|
||||
urlparts[4] = ""
|
||||
parent_url = urlutil.urlunsplit(urlparts)
|
||||
|
|
@ -512,7 +512,7 @@ class UrlBase:
|
|||
urlparts[1] = "%s@%s" % (self.userinfo, host)
|
||||
else:
|
||||
urlparts[1] = host
|
||||
# safe anchor for later checking
|
||||
# save anchor for later checking
|
||||
self.anchor = split.fragment
|
||||
if self.anchor is not None:
|
||||
assert isinstance(self.anchor, str), repr(self.anchor)
|
||||
|
|
|
|||
|
|
@ -26,25 +26,25 @@ from ..htmlutil import linkparse
|
|||
class AnchorCheck(_ContentPlugin):
|
||||
"""Checks validity of HTML anchors."""
|
||||
|
||||
def __init__(self, config):
|
||||
"""Initialize plugin."""
|
||||
super().__init__(config)
|
||||
log.warn(
|
||||
LOG_PLUGIN, _("Anchor check plugin is broken. Fixes welcome.")
|
||||
)
|
||||
|
||||
def applies_to(self, url_data):
|
||||
"""Check for HTML anchor existence."""
|
||||
return False # XXX Plugin disabled
|
||||
return url_data.is_html() and url_data.anchor
|
||||
|
||||
def check(self, url_data):
|
||||
"""Check content for invalid anchors."""
|
||||
log.debug(LOG_PLUGIN, "checking content for invalid anchors")
|
||||
url_anchor_check = UrlAnchorCheck()
|
||||
linkparse.find_links(
|
||||
url_data.get_soup(), url_anchor_check.add_anchor, linkparse.AnchorTags)
|
||||
url_anchor_check.check_anchor(url_data)
|
||||
|
||||
|
||||
class UrlAnchorCheck:
|
||||
"""Class to thread-safely handle collecting anchors for a URL"""
|
||||
|
||||
def __init__(self):
|
||||
# list of parsed anchors
|
||||
self.anchors = []
|
||||
linkparse.find_links(url_data.get_soup(), self.add_anchor, linkparse.AnchorTags)
|
||||
self.check_anchor(url_data)
|
||||
|
||||
def add_anchor(self, url, line, column, name, base):
|
||||
"""Add anchor URL."""
|
||||
|
|
@ -54,17 +54,20 @@ class AnchorCheck(_ContentPlugin):
|
|||
"""If URL is valid, parseable and has an anchor, check it.
|
||||
A warning is logged and True is returned if the anchor is not found.
|
||||
"""
|
||||
log.debug(LOG_PLUGIN, "checking anchor %r in %s", url_data.anchor, self.anchors)
|
||||
if any(x for x in self.anchors if urllib.parse.quote(x[0]) == url_data.anchor):
|
||||
decoded_anchor = urllib.parse.unquote(
|
||||
url_data.anchor, encoding=url_data.encoding)
|
||||
log.debug(LOG_PLUGIN, "checking anchor %r (decoded: %r) in %s",
|
||||
url_data.anchor, decoded_anchor, self.anchors)
|
||||
if any(x for x in self.anchors if x[0] == decoded_anchor):
|
||||
return
|
||||
if self.anchors:
|
||||
anchornames = sorted(set("`%s'" % x[0] for x in self.anchors))
|
||||
anchors = ", ".join(anchornames)
|
||||
else:
|
||||
anchors = "-"
|
||||
args = {"name": url_data.anchor, "anchors": anchors}
|
||||
args = {"name": url_data.anchor, "decoded": decoded_anchor, "anchors": anchors}
|
||||
msg = "%s %s" % (
|
||||
_("Anchor `%(name)s' not found.") % args,
|
||||
_("Anchor `%(name)s' (decoded: `%(decoded)s') not found.") % args,
|
||||
_("Available anchors: %(anchors)s.") % args,
|
||||
)
|
||||
url_data.add_warning(msg)
|
||||
|
|
|
|||
10
tests/checker/data/anchor1.html
Normal file
10
tests/checker/data/anchor1.html
Normal file
|
|
@ -0,0 +1,10 @@
|
|||
<!-- targets -->
|
||||
<a name="oneone">one one</a>
|
||||
<a name="onetwo">one two</a>
|
||||
<a name="onethree">one three</a>
|
||||
<a name="onefour">one four</a>
|
||||
|
||||
<!-- links -->
|
||||
<a href="#oneone">anchor1 one from 1</a>
|
||||
<a href="anchor2.html#twotwo">anchor2 two from 1</a>
|
||||
<a href="anchor2.html#twothree">anchor2 three from 1</a>
|
||||
76
tests/checker/data/anchor1.html.result
Normal file
76
tests/checker/data/anchor1.html.result
Normal file
|
|
@ -0,0 +1,76 @@
|
|||
url #twofour
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor2.html#twofour
|
||||
real url file://%(curdir)s/%(datadir)s/anchor2.html#twofour
|
||||
name anchor2 four from 2
|
||||
valid
|
||||
|
||||
url #threefour
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor3.html#threefour
|
||||
real url file://%(curdir)s/%(datadir)s/anchor3.html#threefour
|
||||
name anchor3 four from 3
|
||||
valid
|
||||
|
||||
url #oneone
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor1.html#oneone
|
||||
real url file://%(curdir)s/%(datadir)s/anchor1.html#oneone
|
||||
name anchor1 one from 1
|
||||
valid
|
||||
|
||||
url #twoone
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor2.html#twoone
|
||||
real url file://%(curdir)s/%(datadir)s/anchor2.html#twoone
|
||||
name anchor2 one from 2
|
||||
valid
|
||||
|
||||
url #threeone
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor3.html#threeone
|
||||
real url file://%(curdir)s/%(datadir)s/anchor3.html#threeone
|
||||
name anchor3 one from 3
|
||||
valid
|
||||
|
||||
url anchor1.html#onefour
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor1.html#onefour
|
||||
real url file://%(curdir)s/%(datadir)s/anchor1.html#onefour
|
||||
name anchor1 four from 3
|
||||
valid
|
||||
|
||||
url anchor1.html#onethree
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor1.html#onethree
|
||||
real url file://%(curdir)s/%(datadir)s/anchor1.html#onethree
|
||||
name anchor1 three from 3
|
||||
valid
|
||||
|
||||
url anchor1.html#onetwo
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor1.html#onetwo
|
||||
real url file://%(curdir)s/%(datadir)s/anchor1.html#onetwo
|
||||
name anchor1 two from 3
|
||||
valid
|
||||
|
||||
url anchor2.html#twothree
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor2.html#twothree
|
||||
real url file://%(curdir)s/%(datadir)s/anchor2.html#twothree
|
||||
name anchor2 three from 1
|
||||
valid
|
||||
|
||||
url anchor2.html#twotwo
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor2.html#twotwo
|
||||
real url file://%(curdir)s/%(datadir)s/anchor2.html#twotwo
|
||||
name anchor2 two from 1
|
||||
valid
|
||||
|
||||
url anchor3.html#threethree
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor3.html#threethree
|
||||
real url file://%(curdir)s/%(datadir)s/anchor3.html#threethree
|
||||
name anchor3 three from 2
|
||||
valid
|
||||
|
||||
url anchor3.html#threetwo
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor3.html#threetwo
|
||||
real url file://%(curdir)s/%(datadir)s/anchor3.html#threetwo
|
||||
name anchor3 two from 2
|
||||
valid
|
||||
|
||||
url file://%(curdir)s/%(datadir)s/anchor1.html
|
||||
cache key file://%(curdir)s/%(datadir)s/anchor1.html
|
||||
real url file://%(curdir)s/%(datadir)s/anchor1.html
|
||||
valid
|
||||
13
tests/checker/data/anchor2.html
Normal file
13
tests/checker/data/anchor2.html
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
<!-- targets -->
|
||||
<a name="twoone">two one</a>
|
||||
<a name="twotwo">two two</a>
|
||||
<a name="twothree">two three</a>
|
||||
<a name="twofour">two four</a>
|
||||
|
||||
<-- links -->
|
||||
<a href="#twoone">anchor2 one from 2</a>
|
||||
<a href="anchor3.html#threetwo">anchor3 two from 2</a>
|
||||
<a href="anchor3.html#threethree">anchor3 three from 2</a>
|
||||
|
||||
<a href="#twofour">anchor2 four from 2</a>
|
||||
<a href="anchor1.html#oneone">anchor1 one from 2</a>
|
||||
13
tests/checker/data/anchor3.html
Normal file
13
tests/checker/data/anchor3.html
Normal file
|
|
@ -0,0 +1,13 @@
|
|||
<!-- targets -->
|
||||
<a name="threeone">three one</a>
|
||||
<a name="threetwo">three two</a>
|
||||
<a name="threethree">three three</a>
|
||||
<a name="threefour">three four</a>
|
||||
|
||||
<-- links -->
|
||||
<a href="#threeone">anchor3 one from 3</a>
|
||||
<a href="anchor1.html#onetwo">anchor1 two from 3</a>
|
||||
<a href="anchor1.html#onethree">anchor1 three from 3</a>
|
||||
|
||||
<a href="#threefour">anchor3 four from 3</a>
|
||||
<a href="anchor1.html#onefour">anchor1 four from 3</a>
|
||||
|
|
@ -7,7 +7,7 @@ url http_anchor.html#bad
|
|||
cache key http://localhost:%(port)d/%(datadir)s/http_anchor.html#bad
|
||||
real url http://localhost:%(port)d/%(datadir)s/http_anchor.html#bad
|
||||
name a_bad
|
||||
warning Anchor `bad' not found. Available anchors: `good'.
|
||||
warning Anchor `bad' (decoded: `bad') not found. Available anchors: `good'.
|
||||
valid
|
||||
|
||||
url http_anchor.html#good
|
||||
|
|
|
|||
25
tests/checker/data/urlencoding_anchor.html
Normal file
25
tests/checker/data/urlencoding_anchor.html
Normal file
|
|
@ -0,0 +1,25 @@
|
|||
<!-- targets -->
|
||||
<a name="good">Good target</a>
|
||||
<a name="with_pipes_|_and_parentheses_(and—em-dashes)">Needs-encoded target #1</a>
|
||||
<a name="another_with_pipes_|_and_parentheses_(and—em-dashes)">Needs-encoded target #2</a>
|
||||
<a name="partially_|_(encoded_em—dash)">Target for partially-encoded testing</a>
|
||||
|
||||
<!-- simple anchors -->
|
||||
<a href="urlencoding_anchor.html">No anchor, will not be tested</a>
|
||||
<a href="urlencoding_anchor.html#good">Good anchor</a>
|
||||
<a href="urlencoding_anchor.html#bad">Bad anchor</a>
|
||||
|
||||
<!-- different encodings of the same complex anchor -->
|
||||
<a href="urlencoding_anchor.html#with_pipes_|_and_parentheses_(and—em-dashes)">Non-encoded anchor</a>
|
||||
<a href="urlencoding_anchor.html#with_pipes_|_and_parentheses_%28and%E2%80%94em-dashes%29">Partially-encoded version of the same anchor</a>
|
||||
<a href="urlencoding_anchor.html#with_pipes_%7C_and_parentheses_%28and%E2%80%94em-dashes%29">Fully-encoded version of the same anchor</a>
|
||||
|
||||
<!-- just another complex anchor -->
|
||||
<a href="urlencoding_anchor.html#another_with_pipes_%7C_and_parentheses_%28and%E2%80%94em-dashes%29">Distinct encoded anchor</a>
|
||||
|
||||
<!-- bad anchor, encoded two ways -->
|
||||
<a href="urlencoding_anchor.html#bad_|_%28and%E2%80%94em-dashes%29">Bad encoded anchor, partially encoded</a>
|
||||
<a href="urlencoding_anchor.html#bad_%7C_%28and%E2%80%94em-dashes%29">Bad encoded anchor, fully encoded</a>
|
||||
|
||||
<!-- stand-alone test for partially-encoded anchor -->
|
||||
<a href="urlencoding_anchor.html#partially_%7C_(encoded_em—dash)">Partially-encoded anchor, for testing</a>
|
||||
42
tests/checker/data/urlencoding_anchor.html.file.result
Normal file
42
tests/checker/data/urlencoding_anchor.html.file.result
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_%%28and%%E2%%80%%94em-dashes%%29
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
name Distinct encoded anchor
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#bad
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#bad
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#bad
|
||||
name Bad anchor
|
||||
warning Anchor `bad' (decoded: `bad') not found. Available anchors: `another_with_pipes_|_and_parentheses_(and—em-dashes)', `good', `partially_|_(encoded_em—dash)', `with_pipes_|_and_parentheses_(and—em-dashes)'.
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#bad_|_%%28and%%E2%%80%%94em-dashes%%29
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#bad_%%7C_(and%%E2%%80%%94em-dashes)
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#bad_%%7C_(and%%E2%%80%%94em-dashes)
|
||||
name Bad encoded anchor, partially encoded
|
||||
warning Anchor `bad_%%7C_(and%%E2%%80%%94em-dashes)' (decoded: `bad_|_(and—em-dashes)') not found. Available anchors: `another_with_pipes_|_and_parentheses_(and—em-dashes)', `good', `partially_|_(encoded_em—dash)', `with_pipes_|_and_parentheses_(and—em-dashes)'.
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#good
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#good
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#good
|
||||
name Good anchor
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#partially_%%7C_(encoded_em—dash)
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#partially_%%7C_(encoded_em%%E2%%80%%94dash)
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#partially_%%7C_(encoded_em%%E2%%80%%94dash)
|
||||
name Partially-encoded anchor, for testing
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#with_pipes_|_and_parentheses_(and—em-dashes)
|
||||
cache key file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
real url file://%(curdir)s/%(datadir)s/urlencoding_anchor.html#with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
name Non-encoded anchor
|
||||
valid
|
||||
42
tests/checker/data/urlencoding_anchor.html.result
Normal file
42
tests/checker/data/urlencoding_anchor.html.result
Normal file
|
|
@ -0,0 +1,42 @@
|
|||
url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_%%28and%%E2%%80%%94em-dashes%%29
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#another_with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
name Distinct encoded anchor
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#bad
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#bad
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#bad
|
||||
name Bad anchor
|
||||
warning Anchor `bad' (decoded: `bad') not found. Available anchors: `another_with_pipes_|_and_parentheses_(and—em-dashes)', `good', `partially_|_(encoded_em—dash)', `with_pipes_|_and_parentheses_(and—em-dashes)'.
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#bad_|_%%28and%%E2%%80%%94em-dashes%%29
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#bad_%%7C_(and%%E2%%80%%94em-dashes)
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#bad_%%7C_(and%%E2%%80%%94em-dashes)
|
||||
name Bad encoded anchor, partially encoded
|
||||
warning Anchor `bad_%%7C_(and%%E2%%80%%94em-dashes)' (decoded: `bad_|_(and—em-dashes)') not found. Available anchors: `another_with_pipes_|_and_parentheses_(and—em-dashes)', `good', `partially_|_(encoded_em—dash)', `with_pipes_|_and_parentheses_(and—em-dashes)'.
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#good
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#good
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#good
|
||||
name Good anchor
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#partially_%%7C_(encoded_em—dash)
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#partially_%%7C_(encoded_em%%E2%%80%%94dash)
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#partially_%%7C_(encoded_em%%E2%%80%%94dash)
|
||||
name Partially-encoded anchor, for testing
|
||||
valid
|
||||
|
||||
url urlencoding_anchor.html#with_pipes_|_and_parentheses_(and—em-dashes)
|
||||
cache key http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
real url http://localhost:%(port)d/%(datadir)s/urlencoding_anchor.html#with_pipes_%%7C_and_parentheses_(and%%E2%%80%%94em-dashes)
|
||||
name Non-encoded anchor
|
||||
valid
|
||||
|
|
@ -161,9 +161,10 @@ class HttpServerTest(LinkCheckTest):
|
|||
self.port = None
|
||||
self.handler = NoQueryHttpRequestHandler
|
||||
|
||||
def setUp(self):
|
||||
def setUp(self, https=False):
|
||||
"""Start a new HTTP server in a new thread."""
|
||||
self.port = start_server(self.handler)
|
||||
super().setUp()
|
||||
self.port = start_server(self.handler, https)
|
||||
assert self.port is not None
|
||||
|
||||
def tearDown(self):
|
||||
|
|
@ -182,8 +183,7 @@ class HttpsServerTest(HttpServerTest):
|
|||
|
||||
def setUp(self):
|
||||
"""Start a new HTTPS server in a new thread."""
|
||||
self.port = start_server(self.handler, https=True)
|
||||
assert self.port is not None
|
||||
super().setUp(https=True)
|
||||
|
||||
def tearDown(self):
|
||||
"""Send QUIT request to http server."""
|
||||
|
|
|
|||
|
|
@ -16,32 +16,30 @@
|
|||
"""
|
||||
Test html anchor parsing and checking.
|
||||
"""
|
||||
import pytest
|
||||
|
||||
from . import LinkCheckTest
|
||||
from .httpserver import HttpServerTest
|
||||
|
||||
|
||||
class TestAnchor(LinkCheckTest):
|
||||
class TestFileAnchor(LinkCheckTest):
|
||||
"""
|
||||
Test anchor checking of HTML pages.
|
||||
Test anchor checking of HTML files.
|
||||
"""
|
||||
|
||||
@pytest.mark.xfail(strict=True)
|
||||
def test_anchor(self):
|
||||
def test_anchor_file(self):
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
anchor = "broken"
|
||||
urlanchor = url + "#" + anchor
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html#%(anchor)s" % self.get_attrs(
|
||||
anchor=anchor)
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
"url %s" % urlanchor,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning Anchor `%s' not found. Available anchors: `myid:'." % anchor,
|
||||
f"url {url}",
|
||||
f"cache key {nurl}",
|
||||
f"real url {nurl}",
|
||||
f"warning Anchor `{anchor}' (decoded: `{anchor}') not found."
|
||||
" Available anchors: `myid:'.",
|
||||
"valid",
|
||||
]
|
||||
self.direct(urlanchor, resultlines, confargs=confargs)
|
||||
self.direct(url, resultlines, confargs=confargs)
|
||||
|
||||
|
||||
class TestHttpAnchor(HttpServerTest):
|
||||
|
|
@ -49,7 +47,45 @@ class TestHttpAnchor(HttpServerTest):
|
|||
Test checking of HTML pages containing links to anchors served over http.
|
||||
"""
|
||||
|
||||
@pytest.mark.xfail(strict=True)
|
||||
def test_anchor_html(self):
|
||||
def test_anchor_http(self):
|
||||
confargs = dict(enabledplugins=["AnchorCheck"], recursionlevel=1)
|
||||
self.file_test("http_anchor.html", confargs=confargs)
|
||||
|
||||
|
||||
class TestEncodedAnchors(HttpServerTest):
|
||||
"""Test HTML pages containing urlencoded links to anchors"""
|
||||
|
||||
def test_anchor_encoded_http(self):
|
||||
"""
|
||||
http://
|
||||
"""
|
||||
confargs = dict(enabledplugins=["AnchorCheck"], recursionlevel=1)
|
||||
self.file_test("urlencoding_anchor.html", confargs=confargs)
|
||||
|
||||
def test_anchor_encoded_file(self):
|
||||
"""
|
||||
file://
|
||||
This should have identical behavior as http://
|
||||
"""
|
||||
filename = "urlencoding_anchor.html"
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/%(filename)s" % self.get_attrs(
|
||||
filename=filename)
|
||||
# get results from the special result file that has `.file.` in its name
|
||||
resultlines = self.get_resultlines(f"{filename}.file")
|
||||
self.direct(url, resultlines, recursionlevel=1, confargs=confargs)
|
||||
|
||||
|
||||
class TestAnchorsAcrossMultipleFiles(LinkCheckTest):
|
||||
"""Test anchors when there are multiple files"""
|
||||
|
||||
def test_anchor1_file(self):
|
||||
"""
|
||||
Test a network of files that reference each other, starting with anchor1.html
|
||||
"""
|
||||
filename = "anchor1.html"
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/%(filename)s" % self.get_attrs(
|
||||
filename=filename)
|
||||
resultlines = self.get_resultlines(filename)
|
||||
self.direct(url, resultlines, recursionlevel=4, confargs=confargs)
|
||||
|
|
|
|||
Loading…
Reference in a new issue