optimize link name search

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2170 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-05-01 19:34:43 +00:00 · 2005-01-20 09:32:24 +00:00 · 2005-01-20 09:32:24 +00:00 · cf30d1ae05
commit cf30d1ae05
parent 4f22afb93b
2 changed files with 10 additions and 1 deletions
--- a/6
+++ b/6
@ -28,6 +28,12 @@
    Type: feature
    Changed: linkchecker

+  * When searching for link names, limit the amount of data to look at
+    to 500 characters. Do not look at the complete content anymore.
+    This speeds up parsing of big HTML files significantly.
+    Type: optimization
+    Changed: linkcheck/linkparse.py
+
 2.1 "Shogun Assassin" (released 11.1.2005)

  * Added XHTML support to the HTML parser.
--- a/linkcheck/linkparse.py
+++ b/linkcheck/linkparse.py
@ -190,7 +190,10 @@ class LinkFinder (TagFinder):
        if tag == 'a' and attr == 'href':
            name = linkcheck.strformat.unquote(attrs.get('title', u''))
            if not name:
-                data = self.content[self.parser.pos():]
+                pos = self.parser.pos()
+                # Look for name only up to characters from current
+                # position, to limit the amount of data to encode.
+                data = self.content[pos:pos+500]
                data = data.decode(self.parser.encoding, "ignore")
                name = linkcheck.linkname.href_name(data)
        elif tag == 'img':