mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-27 01:24:42 +00:00
enable meta refresh url parsing again
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@489 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
35b807854b
commit
53264c1fc9
2 changed files with 8 additions and 4 deletions
1
debian/linkchecker.prerm
vendored
1
debian/linkchecker.prerm
vendored
|
|
@ -13,7 +13,6 @@ dpkg --listfiles $PACKAGE |
|
|||
xargs rm -f >&2
|
||||
|
||||
rmdir /usr/lib/$PYTHON/site-packages/linkcheck 2>/dev/null || true
|
||||
rmdir /usr/lib/$PYTHON/site-packages/DNS 2>/dev/null || true
|
||||
|
||||
# for later use of python-central
|
||||
#/usr/sbin/register-python-package module remove linkchecker ">=2.0"
|
||||
|
|
|
|||
|
|
@ -90,9 +90,6 @@ _linkMatcher = r"""
|
|||
"""
|
||||
|
||||
|
||||
# disable meta tag for now, the modified linkmatcher does not allow it
|
||||
# (['meta'], ['url']), # <meta http-equiv='refresh' content='x; url=...'>
|
||||
|
||||
# ripped mainly from HTML::Tagset.pm
|
||||
LinkTags = (
|
||||
(['a'], ['href']),
|
||||
|
|
@ -118,8 +115,12 @@ LinkTags = (
|
|||
(['script'], ['src', 'for']),
|
||||
(['body', 'table', 'td', 'th', 'tr'], ['background']),
|
||||
(['xmp'], ['href']),
|
||||
(['meta'], ['content']),
|
||||
)
|
||||
|
||||
# matcher for <meta http-equiv=refresh> tags
|
||||
_refresh_re = re.compile(r"(?i)^\d+;\s*url=(?P<url>.+)$")
|
||||
|
||||
LinkPatterns = []
|
||||
for _tags,_attrs in LinkTags:
|
||||
_tag = '(%s)'%'|'.join(_tags)
|
||||
|
|
@ -499,6 +500,10 @@ class UrlData:
|
|||
if self.is_in_comment(match.start()): continue
|
||||
# strip quotes
|
||||
url = StringUtil.stripQuotes(match.group('value'))
|
||||
if 'meta' in pattern['tags']:
|
||||
match = _refresh_re.match(url)
|
||||
if match:
|
||||
url = match.group("url")
|
||||
# need to resolve HTML entities
|
||||
url = StringUtil.unhtmlify(url)
|
||||
lineno= StringUtil.getLineNumber(self.getContent(), match.start())
|
||||
|
|
|
|||
Loading…
Reference in a new issue