check CSS background image urls

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1052 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-10-16 20:39:59 +00:00
parent d450d06167
commit 8f9e0d7a97
2 changed files with 20 additions and 9 deletions

View file

@ -432,7 +432,7 @@ class UrlData (object):
debug(HURT_ME_PLENTY, "checking anchor", anchor)
if not (self.valid and anchor and self.isHtml() and self.hasContent()):
return
h = LinkParser(self.getContent(), {'a': ['name'], None: ['id']})
h = LinkParser(self.getContent(), tags={'a': ['name'], None: ['id']})
for cur_anchor,line,column,name,base in h.urls:
if cur_anchor == anchor:
return
@ -526,7 +526,7 @@ class UrlData (object):
def parse_html (self):
# search for a possible base reference
h = LinkParser(self.getContent(), {'base': ['href']})
h = LinkParser(self.getContent(), tags={'base': ['href']})
baseRef = None
if len(h.urls)>=1:
baseRef = h.urls[0][0]

View file

@ -49,10 +49,12 @@ LinkTags = {
'th': ['background'],
'tr': ['background'],
'xmp': ['href'],
None: ['style'],
}
# matcher for <meta http-equiv=refresh> tags
_refresh_re = re.compile(r"(?i)^\d+;\s*url=(?P<url>.+)$")
_style_background_re = re.compile(r"background-image:\s*url\((?P<url>.+?)\)")
class LinkParser (HtmlParser):
"""Parse the content for a list of links. After parsing, the urls
@ -76,8 +78,9 @@ class LinkParser (HtmlParser):
debug(NIGHTMARE, "LinkParser tag", tag, "attrs", attrs)
debug(NIGHTMARE, "line", self.lineno(), "col", self.column(),
"old line", self.last_lineno(), "old col", self.last_column())
tags = self.tags.get(tag, self.tags.get(None, []))
for attr in tags:
tagattrs = self.tags.get(tag, [])
tagattrs.extend(self.tags.get(None, []))
for attr in tagattrs:
if attr in attrs:
# name of this link
if tag=='a' and attr=='href':
@ -86,6 +89,8 @@ class LinkParser (HtmlParser):
name = linkname.href_name(self.content[self.pos():])
elif tag=='img':
name = StringUtil.unquote(attrs.get('alt', ''))
if not name:
name = StringUtil.unquote(attrs.get('title', ''))
else:
name = ""
# possible codebase
@ -93,21 +98,27 @@ class LinkParser (HtmlParser):
base = StringUtil.unquote(attrs.get('codebase'))
else:
base = ""
# add link to url list
value = StringUtil.unquote(attrs[attr])
# add link to url list
self.addLink(tag, attr, value, name, base)
def addLink (self, tag, attr, url, name, base):
debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
# look for meta refresh
if tag=='meta':
metamatch = _refresh_re.match(url)
if metamatch:
url = metamatch.group("url")
mo = _refresh_re.match(url)
if mo:
url = mo.group("url")
else:
# only meta refresh has an url, so return
return
elif attr=='style':
mo = _style_background_re.search(url)
if mo:
url = mo.group("url")
else:
return
debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
self.urls.append((url, self.last_lineno(), self.last_column(),
name, base))