diff --git a/linkcheck/HttpUrlData.py b/linkcheck/HttpUrlData.py index 7baab373..4582fea5 100644 --- a/linkcheck/HttpUrlData.py +++ b/linkcheck/HttpUrlData.py @@ -16,13 +16,13 @@ # along with this program; if not, write to the Free Software # Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. -import urlparse, sys, time, re, httplib, robotparser +import urlparse, sys, time, re, httplib, robotparser2 from urllib import quote, unquote import Config, i18n from debug import * # XXX not dynamic if get_debuglevel() > 0: - robotparser.debug = 1 + robotparser2.debug = 1 from ProxyUrlData import ProxyUrlData from UrlData import ExcList, GetUrlDataFrom supportHttps = hasattr(httplib, "HTTPSConnection") @@ -378,7 +378,7 @@ class HttpUrlData (ProxyUrlData): debug(HURT_ME_PLENTY, "robots.txt url", roboturl) debug(HURT_ME_PLENTY, "url", self.url) if not self.config.robotsTxtCache_has_key(roboturl): - rp = robotparser.RobotFileParser() + rp = robotparser2.RobotFileParser() rp.set_url(roboturl) rp.read() self.config.robotsTxtCache_set(roboturl, rp) diff --git a/linkcheck/UrlData.py b/linkcheck/UrlData.py index 265e9a5f..52f00e4e 100644 --- a/linkcheck/UrlData.py +++ b/linkcheck/UrlData.py @@ -30,6 +30,8 @@ import Config, StringUtil, test_support from linkparse import LinkParser from debug import * +ws_at_start_or_end = re.compile(r"(^\s+)|(\s+$)").search + # helper function for internal errors def internal_error (): print >>sys.stderr, i18n._("""\n********** Oops, I did it again. ************* @@ -303,6 +305,10 @@ class UrlData (object): self.setError(i18n._("URL is null or empty")) self.logMe() return + if ws_at_start_or_end(self.urlName): + self.setError(i18n._("URL has whitespace at beginning or end")) + self.logMe() + return try: self.buildUrl() self.extern = self._getExtern()