mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 06:20:27 +00:00
Readd
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3867 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2758de9757
commit
c3b6fc5aa4
5 changed files with 32 additions and 9 deletions
|
|
@ -75,6 +75,18 @@
|
|||
Type: feature
|
||||
Changed: linkcheck/checker/fileurl.py
|
||||
|
||||
* Handle non-Latin1 filenames when checking local directories.
|
||||
Type: bugfix
|
||||
Closes: SF bug #2093225
|
||||
Changed: linkcheck/checker/fileurl.py
|
||||
|
||||
* Use configured proxy when requesting robots.txt, especially
|
||||
honor the noproxy values.
|
||||
Type: bugfix
|
||||
Closes: SF bug #2091297
|
||||
Changed: linkcheck/robotparser2.py, linkcheck/cache/robots_txt.py,
|
||||
linkcheck/checker/httpurl.py
|
||||
|
||||
4.9 "Michael Clayton" (released 25.4.2008)
|
||||
|
||||
* Parse Shockwave Flash (SWF) for URLs to check
|
||||
|
|
|
|||
5
linkcheck/cache/robots_txt.py
vendored
5
linkcheck/cache/robots_txt.py
vendored
|
|
@ -36,12 +36,13 @@ class RobotsTxt (object):
|
|||
self.cache = {}
|
||||
|
||||
@synchronized(_lock)
|
||||
def allows_url (self, roboturl, url, user, password, callback=None):
|
||||
def allows_url (self, roboturl, url, proxy, user, password, callback=None):
|
||||
"""
|
||||
Ask robots.txt allowance.
|
||||
"""
|
||||
if roboturl not in self.cache:
|
||||
rp = robotparser2.RobotFileParser(user=user, password=password)
|
||||
rp = robotparser2.RobotFileParser(proxy=proxy, user=user,
|
||||
password=password)
|
||||
rp.set_url(roboturl)
|
||||
rp.read()
|
||||
if callback is not None:
|
||||
|
|
|
|||
|
|
@ -186,7 +186,9 @@ class FileUrl (urlbase.UrlBase):
|
|||
t = time.time()
|
||||
files = get_files(self.get_os_filename())
|
||||
data = get_index_html(files)
|
||||
self.data = data.encode("iso8859-1", "ignore")
|
||||
if isinstance(data, unicode):
|
||||
data = data.encode("iso8859-1", "ignore")
|
||||
self.data = data
|
||||
self.dltime = time.time() - t
|
||||
self.dlsize = len(self.data)
|
||||
return self.data
|
||||
|
|
|
|||
|
|
@ -139,7 +139,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
user, password = self.get_user_password()
|
||||
rb = self.aggregate.robots_txt
|
||||
callback = self.aggregate.connections.host_wait
|
||||
return rb.allows_url(roboturl, url, user, password, callback=callback)
|
||||
return rb.allows_url(roboturl, url, self.proxy, user, password,
|
||||
callback=callback,)
|
||||
|
||||
def check_connection (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -61,10 +61,11 @@ class RobotFileParser (object):
|
|||
"""This class provides a set of methods to read, parse and answer
|
||||
questions about a single robots.txt file."""
|
||||
|
||||
def __init__ (self, url='', user=None, password=None):
|
||||
def __init__ (self, url='', proxy=None, user=None, password=None):
|
||||
"""Initialize internal entry lists and store given url and
|
||||
credentials."""
|
||||
self.set_url(url)
|
||||
self.proxy = proxy
|
||||
self.user = user
|
||||
self.password = password
|
||||
self._reset()
|
||||
|
|
@ -107,16 +108,22 @@ class RobotFileParser (object):
|
|||
"""
|
||||
pwd_manager = PasswordManager(self.user, self.password)
|
||||
handlers = [
|
||||
urllib2.ProxyHandler(urllib.getproxies()),
|
||||
urllib2.UnknownHandler,
|
||||
httputil.HttpWithGzipHandler,
|
||||
urllib2.HTTPBasicAuthHandler(pwd_manager),
|
||||
urllib2.ProxyBasicAuthHandler(pwd_manager),
|
||||
urllib2.HTTPDigestAuthHandler(pwd_manager),
|
||||
urllib2.ProxyDigestAuthHandler(pwd_manager),
|
||||
]
|
||||
if self.proxy:
|
||||
handlers.insert(0,
|
||||
urllib2.ProxyHandler({"http": self.proxy, "https": self.proxy}))
|
||||
handlers.extend([
|
||||
urllib2.ProxyBasicAuthHandler(pwd_manager),
|
||||
urllib2.ProxyDigestAuthHandler(pwd_manager),
|
||||
])
|
||||
handlers.extend([
|
||||
urllib2.HTTPDefaultErrorHandler,
|
||||
urllib2.HTTPRedirectHandler,
|
||||
]
|
||||
])
|
||||
if hasattr(httplib, 'HTTPS'):
|
||||
handlers.append(httputil.HttpsWithGzipHandler)
|
||||
return urllib2.build_opener(*handlers)
|
||||
|
|
|
|||
Loading…
Reference in a new issue