only store parser contents in LinkFinder handler, not in all handlers

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3602 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2007-11-14 19:06:39 +00:00
parent a1d911127b
commit 6a0960aa66
3 changed files with 8 additions and 10 deletions

2
TODO
View file

@ -1,5 +1,3 @@
- [OPTIMIZATION] Don't store content in TagFinder, only in LinkFinder
- [BUG REPORT] Running on Windows XP with threads and a local HTTP Server
yields a lot of (10061 'Connection Refused') errors. Without threads (-t0)
gets rid of these errors. Is it the server?

View file

@ -521,7 +521,7 @@ class UrlBase (object):
if not (self.is_http() or self.is_file()):
return True
# construct parser object
handler = linkcheck.linkparse.MetaRobotsFinder(self.get_content())
handler = linkcheck.linkparse.MetaRobotsFinder()
parser = linkcheck.HtmlParser.htmlsax.parser(handler)
handler.parser = parser
# parse

View file

@ -77,12 +77,11 @@ class TagFinder (object):
TagFinder instances are to be used as HtmlParser handlers.
"""
def __init__ (self, content):
def __init__ (self):
"""
Store content in buffer.
Initialize local variables.
"""
super(TagFinder, self).__init__()
self.content = content
# parser object will be initialized when it is used as
# a handler object
self.parser = None
@ -106,11 +105,11 @@ class MetaRobotsFinder (TagFinder):
Class for finding robots.txt meta values in HTML.
"""
def __init__ (self, content):
def __init__ (self):
"""
Store content in buffer and initialize flags.
Initialize flags.
"""
super(MetaRobotsFinder, self).__init__(content)
super(MetaRobotsFinder, self).__init__()
self.follow = True
self.index = True
assert None == linkcheck.log.debug(linkcheck.LOG_CHECK,
@ -153,7 +152,8 @@ class LinkFinder (TagFinder):
"""
Store content in buffer and initialize URL list.
"""
super(LinkFinder, self).__init__(content)
super(LinkFinder, self).__init__()
self.content = content
if tags is None:
self.tags = LinkTags
else: