mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-01 19:34:43 +00:00
optimize link name search
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2170 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
4f22afb93b
commit
cf30d1ae05
2 changed files with 10 additions and 1 deletions
|
|
@ -28,6 +28,12 @@
|
|||
Type: feature
|
||||
Changed: linkchecker
|
||||
|
||||
* When searching for link names, limit the amount of data to look at
|
||||
to 500 characters. Do not look at the complete content anymore.
|
||||
This speeds up parsing of big HTML files significantly.
|
||||
Type: optimization
|
||||
Changed: linkcheck/linkparse.py
|
||||
|
||||
2.1 "Shogun Assassin" (released 11.1.2005)
|
||||
|
||||
* Added XHTML support to the HTML parser.
|
||||
|
|
|
|||
|
|
@ -190,7 +190,10 @@ class LinkFinder (TagFinder):
|
|||
if tag == 'a' and attr == 'href':
|
||||
name = linkcheck.strformat.unquote(attrs.get('title', u''))
|
||||
if not name:
|
||||
data = self.content[self.parser.pos():]
|
||||
pos = self.parser.pos()
|
||||
# Look for name only up to characters from current
|
||||
# position, to limit the amount of data to encode.
|
||||
data = self.content[pos:pos+500]
|
||||
data = data.decode(self.parser.encoding, "ignore")
|
||||
name = linkcheck.linkname.href_name(data)
|
||||
elif tag == 'img':
|
||||
|
|
|
|||
Loading…
Reference in a new issue