Prefer anchor element content as name instead of title attribute.

This commit is contained in:
Bastian Kleineidam 2010-07-30 21:03:04 +02:00
parent c086f49cea
commit 76f7f6b6a3
2 changed files with 8 additions and 5 deletions

View file

@ -24,6 +24,9 @@ Changes:
Closes: SF bug #1575800
- checking: Add "skype:" to list of ignored URL schemes.
Closes: SF bug #2989086
- logging: Prefer the <a> element content as name instead of the title
attribute.
Closes: SF bug #3023483
Features:
- ftp: Detect and support UTF-8 filename encoding capability of FTP

View file

@ -203,12 +203,12 @@ class LinkFinder (TagFinder):
def get_link_name (self, tag, attrs, attr):
"""Parse attrs for link name. Return name of link."""
if tag == 'a' and attr == 'href':
name = unquote(attrs.get_true('title', u''))
# Look for name only up to MAX_NAMELEN characters
data = self.parser.peek(MAX_NAMELEN)
data = data.decode(self.parser.encoding, "ignore")
name = linkname.href_name(data)
if not name:
# Look for name only up to MAX_NAMELEN characters
data = self.parser.peek(MAX_NAMELEN)
data = data.decode(self.parser.encoding, "ignore")
name = linkname.href_name(data)
name = unquote(attrs.get_true('title', u''))
elif tag == 'img':
name = unquote(attrs.get_true('alt', u''))
if not name: