mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-10 23:53:11 +00:00
check CSS background image urls
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1052 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
d450d06167
commit
8f9e0d7a97
2 changed files with 20 additions and 9 deletions
|
|
@ -432,7 +432,7 @@ class UrlData (object):
|
|||
debug(HURT_ME_PLENTY, "checking anchor", anchor)
|
||||
if not (self.valid and anchor and self.isHtml() and self.hasContent()):
|
||||
return
|
||||
h = LinkParser(self.getContent(), {'a': ['name'], None: ['id']})
|
||||
h = LinkParser(self.getContent(), tags={'a': ['name'], None: ['id']})
|
||||
for cur_anchor,line,column,name,base in h.urls:
|
||||
if cur_anchor == anchor:
|
||||
return
|
||||
|
|
@ -526,7 +526,7 @@ class UrlData (object):
|
|||
|
||||
def parse_html (self):
|
||||
# search for a possible base reference
|
||||
h = LinkParser(self.getContent(), {'base': ['href']})
|
||||
h = LinkParser(self.getContent(), tags={'base': ['href']})
|
||||
baseRef = None
|
||||
if len(h.urls)>=1:
|
||||
baseRef = h.urls[0][0]
|
||||
|
|
|
|||
|
|
@ -49,10 +49,12 @@ LinkTags = {
|
|||
'th': ['background'],
|
||||
'tr': ['background'],
|
||||
'xmp': ['href'],
|
||||
None: ['style'],
|
||||
}
|
||||
|
||||
# matcher for <meta http-equiv=refresh> tags
|
||||
_refresh_re = re.compile(r"(?i)^\d+;\s*url=(?P<url>.+)$")
|
||||
_style_background_re = re.compile(r"background-image:\s*url\((?P<url>.+?)\)")
|
||||
|
||||
class LinkParser (HtmlParser):
|
||||
"""Parse the content for a list of links. After parsing, the urls
|
||||
|
|
@ -76,8 +78,9 @@ class LinkParser (HtmlParser):
|
|||
debug(NIGHTMARE, "LinkParser tag", tag, "attrs", attrs)
|
||||
debug(NIGHTMARE, "line", self.lineno(), "col", self.column(),
|
||||
"old line", self.last_lineno(), "old col", self.last_column())
|
||||
tags = self.tags.get(tag, self.tags.get(None, []))
|
||||
for attr in tags:
|
||||
tagattrs = self.tags.get(tag, [])
|
||||
tagattrs.extend(self.tags.get(None, []))
|
||||
for attr in tagattrs:
|
||||
if attr in attrs:
|
||||
# name of this link
|
||||
if tag=='a' and attr=='href':
|
||||
|
|
@ -86,6 +89,8 @@ class LinkParser (HtmlParser):
|
|||
name = linkname.href_name(self.content[self.pos():])
|
||||
elif tag=='img':
|
||||
name = StringUtil.unquote(attrs.get('alt', ''))
|
||||
if not name:
|
||||
name = StringUtil.unquote(attrs.get('title', ''))
|
||||
else:
|
||||
name = ""
|
||||
# possible codebase
|
||||
|
|
@ -93,21 +98,27 @@ class LinkParser (HtmlParser):
|
|||
base = StringUtil.unquote(attrs.get('codebase'))
|
||||
else:
|
||||
base = ""
|
||||
# add link to url list
|
||||
value = StringUtil.unquote(attrs[attr])
|
||||
# add link to url list
|
||||
self.addLink(tag, attr, value, name, base)
|
||||
|
||||
|
||||
def addLink (self, tag, attr, url, name, base):
|
||||
debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
|
||||
# look for meta refresh
|
||||
if tag=='meta':
|
||||
metamatch = _refresh_re.match(url)
|
||||
if metamatch:
|
||||
url = metamatch.group("url")
|
||||
mo = _refresh_re.match(url)
|
||||
if mo:
|
||||
url = mo.group("url")
|
||||
else:
|
||||
# only meta refresh has an url, so return
|
||||
return
|
||||
elif attr=='style':
|
||||
mo = _style_background_re.search(url)
|
||||
if mo:
|
||||
url = mo.group("url")
|
||||
else:
|
||||
return
|
||||
debug(NIGHTMARE, "LinkParser add link", tag, attr, url, name, base)
|
||||
self.urls.append((url, self.last_lineno(), self.last_column(),
|
||||
name, base))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue