diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py
index 52f00e4e..f8712833 100644
--- a/linkcheck/UrlData.py
+++ b/linkcheck/UrlData.py
@@ -432,7 +432,7 @@ class UrlData (object):
debug(HURT_ME_PLENTY, "checking anchor", anchor)
if not (self.valid and anchor and self.isHtml() and self.hasContent()):
return
- h = LinkParser(self.getContent(), {'a': ['name'], None: ['id']})
+ h = LinkParser(self.getContent(), tags={'a': ['name'], None: ['id']})
for cur_anchor,line,column,name,base in h.urls:
if cur_anchor == anchor:
return
@@ -526,7 +526,7 @@ class UrlData (object):
def parse_html (self):
# search for a possible base reference
- h = LinkParser(self.getContent(), {'base': ['href']})
+ h = LinkParser(self.getContent(), tags={'base': ['href']})
baseRef = None
if len(h.urls)>=1:
baseRef = h.urls[0][0]
diff --git a/linkcheck/linkparse.py b/linkcheck/linkparse.py
index 6be9ce05..6d539a36 100644
--- a/linkcheck/linkparse.py
+++ b/linkcheck/linkparse.py
@@ -49,10 +49,12 @@ LinkTags = {
'th': ['background'],
'tr': ['background'],
'xmp': ['href'],
+ None: ['style'],
}
# matcher for tags
_refresh_re = re.compile(r"(?i)^\d+;\s*url=(?P.+)$")
+_style_background_re = re.compile(r"background-image:\s*url\((?P.+?)\)")
class LinkParser (HtmlParser):
"""Parse the content for a list of links. After parsing, the urls
@@ -76,8 +78,9 @@ class LinkParser (HtmlParser):
debug(NIGHTMARE, "LinkParser tag", tag, "attrs", attrs)
debug(NIGHTMARE, "line", self.lineno(), "col", self.column(),
"old line", self.last_lineno(), "old col", self.last_column())
- tags = self.tags.get(tag, self.tags.get(None, []))
- for attr in tags:
+ tagattrs = self.tags.get(tag, [])
+ tagattrs.extend(self.tags.get(None, []))
+ for attr in tagattrs:
if attr in attrs:
# name of this link
if tag=='a' and attr=='href':
@@ -86,6 +89,8 @@ class LinkParser (HtmlParser):
name = linkname.href_name(self.content[self.pos():])
elif tag=='img':
name = StringUtil.unquote(attrs.get('alt', ''))
+ if not name:
+ name = StringUtil.unquote(attrs.get('title', ''))
else:
name = ""
# possible codebase
@@ -93,21 +98,27 @@ class LinkParser (HtmlParser):
base = StringUtil.unquote(attrs.get('codebase'))
else:
base = ""
- # add link to url list
value = StringUtil.unquote(attrs[attr])
+ # add link to url list
self.addLink(tag, attr, value, name, base)
def addLink (self, tag, attr, url, name, base):
- debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
# look for meta refresh
if tag=='meta':
- metamatch = _refresh_re.match(url)
- if metamatch:
- url = metamatch.group("url")
+ mo = _refresh_re.match(url)
+ if mo:
+ url = mo.group("url")
else:
# only meta refresh has an url, so return
return
+ elif attr=='style':
+ mo = _style_background_re.search(url)
+ if mo:
+ url = mo.group("url")
+ else:
+ return
+ debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
self.urls.append((url, self.last_lineno(), self.last_column(),
name, base))