mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-28 18:14:42 +00:00
fix testing scripts
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@400 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
dbc724e644
commit
3bb5d21955
6 changed files with 17 additions and 41 deletions
|
|
@ -77,20 +77,23 @@ _linkMatcher = r"""
|
|||
\s* # whitespace
|
||||
%s # tag name
|
||||
\s+ # whitespace
|
||||
([^"'>]|"[^"]"|'[^']')*? # skip leading attributes
|
||||
([^"'>]|"[^"]*"|'[^']*')*? # skip leading attributes
|
||||
%s # attrib name
|
||||
\s* # whitespace
|
||||
= # equal sign
|
||||
\s* # whitespace
|
||||
(?P<value> # attribute value
|
||||
"[^"]*" | # in double quotes
|
||||
'[^']*' | # in single quotes
|
||||
"[^"]*" | # in double quotes
|
||||
'[^']*' | # in single quotes
|
||||
[^\s>]+) # unquoted
|
||||
([^"'>]|"[^"]"|'[^']')* # skip trailing attributes
|
||||
([^"'>]|"[^"]*"|'[^']*')* # skip trailing attributes
|
||||
> # close tag
|
||||
"""
|
||||
|
||||
|
||||
# disable meta tag for now, the modified linkmatcher does not allow it
|
||||
# (['meta'], ['url']), # <meta http-equiv='refresh' content='x; url=...'>
|
||||
|
||||
# ripped mainly from HTML::Tagset.pm
|
||||
LinkTags = (
|
||||
(['a'], ['href']),
|
||||
|
|
@ -111,11 +114,10 @@ LinkTags = (
|
|||
(['isindex'], ['action']),
|
||||
(['layer'], ['background', 'src']),
|
||||
(['link'], ['href']),
|
||||
(['meta'], ['url']), # <meta http-equiv='refresh' content='x; url=...'>
|
||||
(['object'], ['classid', 'codebase', 'data', 'archive', 'usemap']),
|
||||
(['q'], ['cite']),
|
||||
(['script'], ['src', 'for']),
|
||||
(['body', 'table', 'td', 'th', 'tr'], ['background']),
|
||||
(['body', 'table', 'td', 'th', 'tr'], ['background']),
|
||||
(['xmp'], ['href']),
|
||||
)
|
||||
|
||||
|
|
@ -147,9 +149,9 @@ CommentPatternEnd = re.compile("--\s*>")
|
|||
class UrlData:
|
||||
"Representing a URL with additional information like validity etc"
|
||||
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
def __init__(self,
|
||||
urlName,
|
||||
recursionLevel,
|
||||
parentName = None,
|
||||
baseRef = None,
|
||||
line = 0,
|
||||
|
|
@ -453,8 +455,8 @@ class UrlData:
|
|||
|
||||
|
||||
def searchInForTag(self, pattern):
|
||||
debug(HURT_ME_PLENTY, "Searching for tag", pattern['tag'],
|
||||
"attribute", pattern['attr'])
|
||||
debug(HURT_ME_PLENTY, "Searching for tag", `pattern['tag']`,
|
||||
"attribute", `pattern['attr']`)
|
||||
urls = []
|
||||
index = 0
|
||||
while 1:
|
||||
|
|
@ -462,14 +464,15 @@ class UrlData:
|
|||
if not match: break
|
||||
index = match.end()
|
||||
if self.is_in_comment(match.start()): continue
|
||||
# need to strip optional ending quotes for the meta tag
|
||||
url = StringUtil.stripQuotes(match.group('value')).strip()
|
||||
# strip quotes
|
||||
url = StringUtil.stripQuotes(match.group('value'))
|
||||
# need to resolve HTML entities
|
||||
url = StringUtil.unhtmlify(url)
|
||||
lineno= StringUtil.getLineNumber(self.getContent(), match.start())
|
||||
# extra feature: get optional name for this bookmark
|
||||
name = self.searchInForName(pattern['tag'], pattern['attr'],
|
||||
match.start(), match.end())
|
||||
debug(HURT_ME_PLENTY, "Found", `url`, "at line", lineno)
|
||||
urls.append((url, lineno, name))
|
||||
return urls
|
||||
|
||||
|
|
|
|||
|
|
@ -1,5 +0,0 @@
|
|||
<!-- base with href -->
|
||||
<base href="..">
|
||||
<!-- good file -->
|
||||
<a href="html/base1.html">
|
||||
|
||||
|
|
@ -1,4 +0,0 @@
|
|||
<!-- base with href -->
|
||||
<base href="..">
|
||||
<!-- just an url -->
|
||||
<a href="html/base1.html">
|
||||
|
|
@ -1,20 +1,5 @@
|
|||
test_base
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base1.html
|
||||
valid
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base2.html
|
||||
valid
|
||||
url file:///home/calvin/projects/linkchecker/test/html/base3.html
|
||||
valid
|
||||
url base2.html
|
||||
cached
|
||||
valid
|
||||
url base2.html
|
||||
cached
|
||||
valid
|
||||
url html/base1.html
|
||||
baseurl ..
|
||||
error
|
||||
url html/base1.html
|
||||
cached
|
||||
baseurl ..
|
||||
error
|
||||
|
|
|
|||
|
|
@ -18,6 +18,3 @@ valid
|
|||
url telnet://user:password@route-views.oregon-ix.net
|
||||
cached
|
||||
valid
|
||||
url http://localhost
|
||||
warning Missing '/' at end of URL
|
||||
valid
|
||||
|
|
|
|||
|
|
@ -7,7 +7,7 @@ config["anchors"] = 1
|
|||
config["verbose"] = 1
|
||||
config.disableThreading()
|
||||
htmldir = "test/html"
|
||||
for file in ('base1.html','base2.html','base3.html'):
|
||||
for file in ('base1.html',):
|
||||
url = os.path.join(htmldir, file)
|
||||
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
|
||||
linkcheck.checkUrls(config)
|
||||
|
|
|
|||
Loading…
Reference in a new issue