diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py
index 75c6a4ec..0a8980e3 100644
--- a/linkcheck/HtmlParser/htmllib.py
+++ b/linkcheck/HtmlParser/htmllib.py
@@ -89,7 +89,7 @@ class HtmlPrettyPrinter (object):
"""
self.fd.write("" % data)
- def start_element (self, tag, attrs):
+ def start_element (self, tag, attrs, element_text=None):
"""
Print HTML start element.
diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py
index f2c2909d..20d45a38 100644
--- a/linkcheck/htmlutil/linkparse.py
+++ b/linkcheck/htmlutil/linkparse.py
@@ -23,8 +23,6 @@ from .. import strformat, log, LOG_CHECK, url as urlutil
from . import linkname
from builtins import str as str_text
-MAX_NAMELEN = 256
-
unquote = strformat.unquote
# HTML4/5 link tags
@@ -130,7 +128,7 @@ class MetaRobotsFinder (TagFinder):
log.debug(LOG_CHECK, "meta robots finder")
self.follow = self.index = True
- def start_element (self, tag, attrs):
+ def start_element (self, tag, attrs, element_text=None):
"""Search for meta robots.txt "nofollow" and "noindex" flags."""
if tag == 'meta' and attrs.get('name') == 'robots':
val = attrs.get_true('content', u'').lower().split(u',')
@@ -180,7 +178,7 @@ class LinkFinder (TagFinder):
self.tags[tag].update(self.universal_attrs)
self.base_ref = u''
- def start_element (self, tag, attrs):
+ def start_element (self, tag, attrs, element_text=None):
"""Search for links and store found URLs in a list."""
log.debug(LOG_CHECK, "LinkFinder tag %s attrs %s", tag, attrs)
log.debug(LOG_CHECK, "line %d col %d old line %d old col %d", self.parser.lineno(), self.parser.column(), self.parser.last_lineno(), self.parser.last_column())
@@ -194,7 +192,7 @@ class LinkFinder (TagFinder):
if tag == "form" and not is_form_get(attr, attrs):
continue
# name of this link
- name = self.get_link_name(tag, attrs, attr)
+ name = self.get_link_name(tag, attrs, attr, element_text)
# possible codebase
base = u''
if tag == 'applet':
@@ -211,13 +209,9 @@ class LinkFinder (TagFinder):
self.parse_tag(tag, attr, value, name, base)
log.debug(LOG_CHECK, "LinkFinder finished tag %s", tag)
- def get_link_name (self, tag, attrs, attr):
+ def get_link_name (self, tag, attrs, attr, name=None):
"""Parse attrs for link name. Return name of link."""
if tag == 'a' and attr == 'href':
- # Look for name only up to MAX_NAMELEN characters
- data = self.parser.peek(MAX_NAMELEN)
- data = data.decode(self.parser.encoding, "ignore")
- name = linkname.href_name(data)
if not name:
name = attrs.get_true('title', u'')
elif tag == 'img':
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 49b40f0d..0851b3c6 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -282,21 +282,6 @@ class TestParser (unittest.TestCase):
self.assertEqual(resolve("%d;" % ord(c)), c)
self.assertEqual(resolve(""), u"")
- def test_peek (self):
- # Test peek() parser function
- data = 'name'
-
- class NamePeeker (object):
-
- def start_element (self_handler, tag, attrs):
- # use self reference of TestParser instance
- self.assertRaises(TypeError, self.htmlparser.peek, -1)
- self.assertEqual(self.htmlparser.peek(0), "")
- self.assertEqual(self.htmlparser.peek(4), "name")
-
- self.htmlparser.handler = NamePeeker()
- self.htmlparser.feed(data)
-
def test_encoding_detection (self):
html = ''
self.encoding_test(html, "utf-8")