From e1e713036bc0dedd5553d62dd10e46af9db79d34 Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 7 Mar 2005 13:57:01 +0000 Subject: [PATCH] use noproxyfor config values to ignore proxy settings git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2360 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- ChangeLog | 17 ++++++++++--- linkcheck/checker/httpurl.py | 13 +++++++--- linkcheck/checker/proxysupport.py | 42 ++++++++++++++++++++++--------- 3 files changed, 52 insertions(+), 20 deletions(-) diff --git a/ChangeLog b/ChangeLog index 5e2738e7..fdbf5e4e 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,16 +1,25 @@ 2.6 "" (released xx.xx.xxxx) - * New option --low-priority to decrease the priority of spawned - threads. + * New option --low-priority to decrease the priority of spawned threads. Type: feature Changed: linkchecker, linkcheck/threader.py - * If GeoIP Python wrapper is installed, log the country name as - info. + * If GeoIP Python wrapper is installed, log the country name as info. Type: feature Changed: linkcheck/checker/consumer.py Added: linkcheck/checker/geoip.py + * New option --no-proxy-for that lets linkchecker contact the given + hosts directly instead of going through a proxy. + Also configurable in linkcheckerrc + Type: feature + Changed: linkchecker, linkcheck/checker/proxysupport.py, + linkcheck/configuration.py + + * Syntax errors in regular expressions give a useful error message. + Type: bugfix + Changed: linkchecker, linkcheck/configuration.py + 2.5 "Spanglish" (released 4.3.2005) * Added spanish translation, thanks to Servilio Afre Puentes. diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 529d91a6..4a331f72 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -191,8 +191,6 @@ class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport): """ # set the proxy, so a 407 status after this is an error self.set_proxy(self.consumer.config["proxy"].get(self.scheme)) - if self.proxy: - self.add_info(_("Using Proxy %r.") % self.proxy) self.headers = None self.auth = None self.cookies = [] @@ -232,10 +230,17 @@ class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport): # proxy enforcement (overrides standard proxy) if response.status == 305 and self.headers: oldproxy = (self.proxy, self.proxyauth) - self.set_proxy(self.headers.getheader("Location")) - self.add_info(_("Enforced Proxy %r.") % self.proxy) + newproxy = self.headers.getheader("Location") + self.add_info(_("Enforced proxy %r.") % newproxy) + self.set_proxy(newproxy) + if not self.proxy: + self.set_result( + _("Enforced proxy %r ignored, aborting.") % newproxy, + valid=False) + return response = self._get_http_response() self.headers = response.msg + # restore old proxy settings self.proxy, self.proxyauth = oldproxy try: tries, response = self.follow_redirections(response) diff --git a/linkcheck/checker/proxysupport.py b/linkcheck/checker/proxysupport.py index f70387f5..a9d8d7e5 100644 --- a/linkcheck/checker/proxysupport.py +++ b/linkcheck/checker/proxysupport.py @@ -28,18 +28,36 @@ class ProxySupport (object): def set_proxy (self, proxy): """ Parse given proxy information and store parsed values. + Note that only http:// proxies are supported, both for ftp:// + and http:// urls. """ self.proxy = proxy self.proxyauth = None - if self.proxy: - if self.proxy[:7].lower() != "http://": - self.proxy = "http://"+self.proxy - self.proxy = urllib.splittype(self.proxy)[1] - self.proxy = urllib.splithost(self.proxy)[0] - self.proxyauth, self.proxy = urllib.splituser(self.proxy) - if self.proxyauth is not None: - if ":" not in self.proxyauth: - self.proxyauth += ":" - import base64 - self.proxyauth = base64.encodestring(self.proxyauth).strip() - self.proxyauth = "Basic "+self.proxyauth + if not self.proxy: + self.add_info(_("Ignoring empty proxy setting for %r") % self.url) + return + if self.proxy[:7].lower() != "http://": + self.proxy = "http://"+self.proxy + self.proxy = urllib.splittype(self.proxy)[1] + self.proxy = urllib.splithost(self.proxy)[0] + self.proxyauth, self.proxy = urllib.splituser(self.proxy) + if self.ignore_proxy_host(): + # log proxy without auth info + self.add_info(_("Ignoring proxy setting %r") % self.proxy) + self.proxy = None + self.proxyauth = None + return + self.add_info(_("Using proxy %r.") % self.proxy) + if self.proxyauth is not None: + if ":" not in self.proxyauth: + self.proxyauth += ":" + import base64 + self.proxyauth = base64.encodestring(self.proxyauth).strip() + self.proxyauth = "Basic "+self.proxyauth + + + def ignore_proxy_host (self): + for ro in self.consumer.config["noproxyfor"]: + if ro.search(self.host): + return True + return False