mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-30 20:50:27 +00:00
use new robotparser2
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1042 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
c4d243dfc7
commit
c744aa56fc
2 changed files with 9 additions and 3 deletions
|
|
@ -16,13 +16,13 @@
|
|||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||||
|
||||
import urlparse, sys, time, re, httplib, robotparser
|
||||
import urlparse, sys, time, re, httplib, robotparser2
|
||||
from urllib import quote, unquote
|
||||
import Config, i18n
|
||||
from debug import *
|
||||
# XXX not dynamic
|
||||
if get_debuglevel() > 0:
|
||||
robotparser.debug = 1
|
||||
robotparser2.debug = 1
|
||||
from ProxyUrlData import ProxyUrlData
|
||||
from UrlData import ExcList, GetUrlDataFrom
|
||||
supportHttps = hasattr(httplib, "HTTPSConnection")
|
||||
|
|
@ -378,7 +378,7 @@ class HttpUrlData (ProxyUrlData):
|
|||
debug(HURT_ME_PLENTY, "robots.txt url", roboturl)
|
||||
debug(HURT_ME_PLENTY, "url", self.url)
|
||||
if not self.config.robotsTxtCache_has_key(roboturl):
|
||||
rp = robotparser.RobotFileParser()
|
||||
rp = robotparser2.RobotFileParser()
|
||||
rp.set_url(roboturl)
|
||||
rp.read()
|
||||
self.config.robotsTxtCache_set(roboturl, rp)
|
||||
|
|
|
|||
|
|
@ -30,6 +30,8 @@ import Config, StringUtil, test_support
|
|||
from linkparse import LinkParser
|
||||
from debug import *
|
||||
|
||||
ws_at_start_or_end = re.compile(r"(^\s+)|(\s+$)").search
|
||||
|
||||
# helper function for internal errors
|
||||
def internal_error ():
|
||||
print >>sys.stderr, i18n._("""\n********** Oops, I did it again. *************
|
||||
|
|
@ -303,6 +305,10 @@ class UrlData (object):
|
|||
self.setError(i18n._("URL is null or empty"))
|
||||
self.logMe()
|
||||
return
|
||||
if ws_at_start_or_end(self.urlName):
|
||||
self.setError(i18n._("URL has whitespace at beginning or end"))
|
||||
self.logMe()
|
||||
return
|
||||
try:
|
||||
self.buildUrl()
|
||||
self.extern = self._getExtern()
|
||||
|
|
|
|||
Loading…
Reference in a new issue