From e3af24186b9fb861421a030a2c9f334d814874c4 Mon Sep 17 00:00:00 2001 From: calvin Date: Wed, 25 Aug 2004 18:24:07 +0000 Subject: [PATCH] remove whitespace check, it is not needed now the url is always quoted git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1487 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/checker/urlbase.py | 28 ++++++++-------------------- 1 file changed, 8 insertions(+), 20 deletions(-) diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 17fd67f0..fc071bdc 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -37,9 +37,6 @@ import linkcheck.HtmlParser.htmlsax from linkcheck.i18n import _ -ws_at_start_or_end = re.compile(r"(^\s+)|(\s+$)").search - - def internal_error (): """print internal error message to stderr""" print >> sys.stderr, os.linesep @@ -197,9 +194,7 @@ class UrlBase (object): return self.consumer.cache.url_is_cached(self.get_cache_key()) def get_cache_key (self): - """Get key to store this url data in the cache. Note that - this method is only called after self.build_url() succeeds. - """ + """Get key to store this url data in the cache.""" assert self.anchor is not None if self.urlparts: if self.consumer.config["anchorcaching"]: @@ -231,10 +226,12 @@ class UrlBase (object): self.urlparts[1] = host.lower() # safe anchor for later checking self.anchor = self.urlparts[4] - x, port = urllib.splitport(host) - if port is not None and not linkcheck.url.is_numeric_port(port): - raise linkcheck.LinkCheckerError(_("URL has invalid port %r") %\ - str(port)) + self.host, self.port = urllib.splitport(host) + if self.port is not None: + if not linkcheck.url.is_numeric_port(self.port): + raise linkcheck.LinkCheckerError( + _("URL has invalid port %r") % str(self.port)) + self.port = int(self.port) def check (self): try: @@ -324,27 +321,18 @@ class UrlBase (object): def check_syntax (self): """Called before self.check(), this function inspects the url syntax. Success enables further checking, failure - immediately logs this url. This syntax check must not + immediately logs this url. Syntax checks must not use any network resources. """ linkcheck.log.debug(linkcheck.LOG_CHECK, "checking syntax") if not self.base_url: self.set_result(_("URL is empty"), valid=False) - self.consumer.logger_new_url(self) - return False - if ws_at_start_or_end(self.base_url): - # leading or trailing whitespace is common, so make a - # separate error message for this - self.set_result(_("URL has whitespace at beginning or end"), - valid=False) - self.consumer.logger_new_url(self) return False try: self.build_url() self.extern = self._get_extern() except linkcheck.LinkCheckerError, msg: self.set_result(str(msg), valid=False) - self.consumer.logger_new_url(self) return False return True