diff --git a/ChangeLog b/ChangeLog index 96254518..173d856a 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,10 @@ +1.6.7 + * Removed check for tags codebase attribute, but honor it + when checking applet links + * Handle tags archive attribute as a comma separated list + * Fix a deep flaw in tag searching, which ignored tags with more + than one link attribute in it. + 1.6.6 * Use the new HTTPConnection/HTTPResponse interface of httplib Closes: SF bug #634679 diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 1639a391..030ab129 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -100,7 +100,7 @@ _linkMatcher = r""" # ripped mainly from HTML::Tagset.pm LinkTags = ( (['a'], ['href']), - (['applet'], ['archive', 'codebase', 'src']), + (['applet'], ['archive', 'src']), (['area'], ['href']), (['bgsound'], ['src']), (['blockquote'], ['cite']), @@ -117,7 +117,7 @@ LinkTags = ( (['isindex'], ['action']), (['layer'], ['background', 'src']), (['link'], ['href']), - (['object'], ['classid', 'codebase', 'data', 'archive', 'usemap']), + (['object'], ['classid', 'data', 'archive', 'usemap']), (['q'], ['cite']), (['script'], ['src', 'for']), (['body', 'table', 'td', 'th', 'tr'], ['background']), @@ -131,11 +131,11 @@ _refresh_re = re.compile(r"(?i)^\d+;\s*url=(?P.+)$") LinkPatterns = [] for _tags,_attrs in LinkTags: _tag = '(%s)'%'|'.join(_tags) - _attr = '(%s)'%'|'.join(_attrs) - LinkPatterns.append({'pattern': re.compile(_linkMatcher % (_tag, _attr), - re.VERBOSE), - 'tags': _tags, - 'attrs': _attrs}) + for _attr in _attrs: + LinkPatterns.append({'pattern': re.compile(_linkMatcher % + (_tag, _attr), re.VERBOSE), + 'tags': _tags, + 'attrs': _attrs}) AnchorPattern = { 'pattern': re.compile(_linkMatcher % ("a", "name"), re.VERBOSE), 'tags': ['a'], @@ -148,8 +148,6 @@ BasePattern = { 'attrs': ['href'], } -#CommentPattern = re.compile(" 1 2 -3 +3 4 6 5 diff --git a/test/output/test_base b/test/output/test_base index c9611c03..a1eea9dd 100644 --- a/test/output/test_base +++ b/test/output/test_base @@ -1,8 +1,19 @@ test_base url file:///home/calvin/projects/linkchecker/test/html/base1.html valid +url file:///home/calvin/projects/linkchecker/test/html/base2.html +valid +url file:///home/calvin/projects/linkchecker/test/html/codebase.html +valid url misc.html valid url misc.html cached valid +url test.txt +baseurl file:///home/calvin/projects/linkchecker/test/html/base/ +valid +url test.txt +cached +baseurl file:///home/calvin/projects/linkchecker/test/html/base/ +valid diff --git a/test/output/test_ftp b/test/output/test_ftp index 8471297a..3dd94331 100644 --- a/test/output/test_ftp +++ b/test/output/test_ftp @@ -4,13 +4,13 @@ valid url ftp:/ftp.debian.org/pub error url ftp://ftp.debian.org/pub -info 220 saens.debian.org FTP server (vsftpd) +info 220 raff.debian.org FTP server (vsftpd) valid url ftp://ftp.debian.org//pub -info 220 saens.debian.org FTP server (vsftpd) +info 220 raff.debian.org FTP server (vsftpd) valid url ftp://ftp.debian.org////////pub -info 220 saens.debian.org FTP server (vsftpd) +info 220 raff.debian.org FTP server (vsftpd) valid url ftp:///ftp.debian.org/pub cached diff --git a/test/output/test_mail b/test/output/test_mail index 419dce22..f3c66950 100644 --- a/test/output/test_mail +++ b/test/output/test_mail @@ -8,9 +8,8 @@ valid url mailto:Dude , Killer ?subject=bla name 2 valid -url mailto:Bastian Kleineidam ?bcc=jsmith%40company.com +url mailto:Bastian Kleineidam ?bcc=jsmith%40wummel.company.com name 3 -warning No MX mail host for company.com found valid url mailto:Bastian Kleineidam name 4 diff --git a/test/test_base.py b/test/test_base.py index cce45125..a6fb5c0f 100644 --- a/test/test_base.py +++ b/test/test_base.py @@ -7,7 +7,7 @@ config["anchors"] = 1 config["verbose"] = 1 config.disableThreading() htmldir = "test/html" -for file in ('base1.html',): +for file in ('base1.html','base2.html', 'codebase.html'): url = os.path.join(htmldir, file) config.appendUrl(linkcheck.UrlData.GetUrlDataFrom(url, 0, config)) linkcheck.checkUrls(config)