use new robotparser2

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1042 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2003-09-23 22:00:14 +00:00
parent c4d243dfc7
commit c744aa56fc
2 changed files with 9 additions and 3 deletions

View file

@ -16,13 +16,13 @@
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import urlparse, sys, time, re, httplib, robotparser
import urlparse, sys, time, re, httplib, robotparser2
from urllib import quote, unquote
import Config, i18n
from debug import *
# XXX not dynamic
if get_debuglevel() > 0:
robotparser.debug = 1
robotparser2.debug = 1
from ProxyUrlData import ProxyUrlData
from UrlData import ExcList, GetUrlDataFrom
supportHttps = hasattr(httplib, "HTTPSConnection")
@ -378,7 +378,7 @@ class HttpUrlData (ProxyUrlData):
debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
debug(HURT_ME_PLENTY, "url", self.url)
if not self.config.robotsTxtCache_has_key(roboturl):
rp = robotparser.RobotFileParser()
rp = robotparser2.RobotFileParser()
rp.set_url(roboturl)
rp.read()
self.config.robotsTxtCache_set(roboturl, rp)

View file

@ -30,6 +30,8 @@ import Config, StringUtil, test_support
from linkparse import LinkParser
from debug import *
ws_at_start_or_end = re.compile(r"(^\s+)|(\s+$)").search
# helper function for internal errors
def internal_error ():
print >>sys.stderr, i18n._("""\n********** Oops, I did it again. *************
@ -303,6 +305,10 @@ class UrlData (object):
self.setError(i18n._("URL is null or empty"))
self.logMe()
return
if ws_at_start_or_end(self.urlName):
self.setError(i18n._("URL has whitespace at beginning or end"))
self.logMe()
return
try:
self.buildUrl()
self.extern = self._getExtern()