fix testing scripts

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@400 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2002-04-27 12:54:53 +00:00
parent dbc724e644
commit 3bb5d21955
6 changed files with 17 additions and 41 deletions

View file

@ -77,20 +77,23 @@ _linkMatcher = r"""
\s* # whitespace
%s # tag name
\s+ # whitespace
([^"'>]|"[^"]"|'[^']')*? # skip leading attributes
([^"'>]|"[^"]*"|'[^']*')*? # skip leading attributes
%s # attrib name
\s* # whitespace
= # equal sign
\s* # whitespace
(?P<value> # attribute value
"[^"]*" | # in double quotes
'[^']*' | # in single quotes
"[^"]*" | # in double quotes
'[^']*' | # in single quotes
[^\s>]+) # unquoted
([^"'>]|"[^"]"|'[^']')* # skip trailing attributes
([^"'>]|"[^"]*"|'[^']*')* # skip trailing attributes
> # close tag
"""
# disable meta tag for now, the modified linkmatcher does not allow it
# (['meta'], ['url']), # <meta http-equiv='refresh' content='x; url=...'>
# ripped mainly from HTML::Tagset.pm
LinkTags = (
(['a'], ['href']),
@ -111,11 +114,10 @@ LinkTags = (
(['isindex'], ['action']),
(['layer'], ['background', 'src']),
(['link'], ['href']),
(['meta'], ['url']), # <meta http-equiv='refresh' content='x; url=...'>
(['object'], ['classid', 'codebase', 'data', 'archive', 'usemap']),
(['q'], ['cite']),
(['script'], ['src', 'for']),
(['body', 'table', 'td', 'th', 'tr'], ['background']),
(['body', 'table', 'td', 'th', 'tr'], ['background']),
(['xmp'], ['href']),
)
@ -147,9 +149,9 @@ CommentPatternEnd = re.compile("--\s*>")
class UrlData:
"Representing a URL with additional information like validity etc"
def __init__(self,
urlName,
recursionLevel,
def __init__(self,
urlName,
recursionLevel,
parentName = None,
baseRef = None,
line = 0,
@ -453,8 +455,8 @@ class UrlData:
def searchInForTag(self, pattern):
debug(HURT_ME_PLENTY, "Searching for tag", pattern['tag'],
"attribute", pattern['attr'])
debug(HURT_ME_PLENTY, "Searching for tag", `pattern['tag']`,
"attribute", `pattern['attr']`)
urls = []
index = 0
while 1:
@ -462,14 +464,15 @@ class UrlData:
if not match: break
index = match.end()
if self.is_in_comment(match.start()): continue
# need to strip optional ending quotes for the meta tag
url = StringUtil.stripQuotes(match.group('value')).strip()
# strip quotes
url = StringUtil.stripQuotes(match.group('value'))
# need to resolve HTML entities
url = StringUtil.unhtmlify(url)
lineno= StringUtil.getLineNumber(self.getContent(), match.start())
# extra feature: get optional name for this bookmark
name = self.searchInForName(pattern['tag'], pattern['attr'],
match.start(), match.end())
debug(HURT_ME_PLENTY, "Found", `url`, "at line", lineno)
urls.append((url, lineno, name))
return urls

View file

@ -1,5 +0,0 @@
<!-- base with href -->
<base href="..">
<!-- good file -->
<a href="html/base1.html">

View file

@ -1,4 +0,0 @@
<!-- base with href -->
<base href="..">
<!-- just an url -->
<a href="html/base1.html">

View file

@ -1,20 +1,5 @@
test_base
url file:///home/calvin/projects/linkchecker/test/html/base1.html
valid
url file:///home/calvin/projects/linkchecker/test/html/base2.html
valid
url file:///home/calvin/projects/linkchecker/test/html/base3.html
valid
url base2.html
cached
valid
url base2.html
cached
valid
url html/base1.html
baseurl ..
error
url html/base1.html
cached
baseurl ..
error

View file

@ -18,6 +18,3 @@ valid
url telnet://user:password@route-views.oregon-ix.net
cached
valid
url http://localhost
warning Missing '/' at end of URL
valid

View file

@ -7,7 +7,7 @@ config["anchors"] = 1
config["verbose"] = 1
config.disableThreading()
htmldir = "test/html"
for file in ('base1.html','base2.html','base3.html'):
for file in ('base1.html',):
url = os.path.join(htmldir, file)
config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0))
linkcheck.checkUrls(config)