Fix proxy support.

This commit is contained in:
Bastian Kleineidam 2014-09-05 22:48:10 +02:00
parent c580c0ec52
commit 06c6b80ed3
3 changed files with 18 additions and 11 deletions

View file

@ -4,6 +4,10 @@ Features:
- checking: Support itms-services: URLs.
Closes: GH bug #532
Fixes:
- checking: Correct typos in the proxy handling code.
Closes: GH bug #536
9.3 "Better Living Through Chemistry" (released 16.7.2014)

View file

@ -59,7 +59,7 @@ class RobotsTxt (object):
self.misses += 1
kwargs = dict(auth=url_data.auth, session=url_data.session)
if url_data.proxy:
kwargs["proxies"] = {url_data.proxytype, url_data.proxy}
kwargs["proxies"] = {url_data.proxytype: url_data.proxy}
rp = robotparser2.RobotFileParser(**kwargs)
rp.set_url(roboturl)
rp.read()

View file

@ -18,6 +18,7 @@
Mixin class for URLs that can be fetched over a proxy.
"""
import urllib
import urlparse
import os
from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
@ -35,29 +36,30 @@ class ProxySupport (object):
self.proxyauth = None
if not self.proxy:
return
self.proxytype, self.proxy = urllib.splittype(self.proxy)
proxyurl = urlparse.urlparse(self.proxy)
self.proxytype = proxyurl.scheme
if self.proxytype not in ('http', 'https'):
# Note that invalid proxies might raise TypeError in urllib2,
# so make sure to stop checking at this point, not later.
msg = _("Proxy value `%(proxy)s' must start with 'http:' or 'https:'.") \
% dict(proxy=proxy)
raise LinkCheckerError(msg)
self.proxy = urllib.splithost(self.proxy)[0]
self.proxyauth, self.proxy = urllib.splituser(self.proxy)
if self.ignore_proxy_host():
# log proxy without auth info
log.debug(LOG_CHECK, "ignoring proxy %r", self.proxy)
self.add_info(_("Ignoring proxy setting `%(proxy)s'.") %
dict(proxy=proxy))
self.proxy = self.proxyauth = None
self.proxy = None
return
log.debug(LOG_CHECK, "using proxy %r", self.proxy)
self.add_info(_("Using proxy `%(proxy)s'.") % dict(proxy=self.proxy))
if self.proxyauth is not None:
if ":" not in self.proxyauth:
self.proxyauth += ":"
self.proxyauth = httputil.encode_base64(self.proxyauth)
self.proxyauth = "Basic "+self.proxyauth
self.proxyhost = proxyurl.hostname
self.proxyport = proxyurl.port
if proxyurl.username is not None:
username = proxyurl.username
password = proxyurl.password if proxy.password is not None else ""
auth = "%s:%s" % (username, password)
self.proxyauth = "Basic "+httputil.encode_base64(auth)
def ignore_proxy_host (self):
"""Check if self.host is in the $no_proxy ignore list."""
@ -79,7 +81,8 @@ class ProxySupport (object):
"""
if self.proxy:
scheme = self.proxytype
host, port = urlutil.splitport(self.proxy)
host = self.proxyhost
port = self.proxyport
else:
scheme = self.scheme
host = self.host