From 6c38b4165aabd28d33a2ce508344952b19742558 Mon Sep 17 00:00:00 2001
From: Bastian Kleineidam <bastian.kleineidam@web.de>
Date: Mon, 14 Jul 2014 19:50:11 +0200
Subject: [PATCH] Use given HTTP auth data for robots.txt fetching.

---
 doc/changelog.txt             |  2 ++
 linkcheck/cache/robots_txt.py |  4 +---
 linkcheck/robotparser2.py     | 20 ++++++++++++--------
 3 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/doc/changelog.txt b/doc/changelog.txt
index aeae0270..c84a080d 100644
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@@ -26,6 +26,8 @@ Fixes:
   Closes: GH bug #521
 - cgi: Sanitize configuration.
   Closes: GH bug #519
+- checking: Use user-supplied authentication when requestiong robot.txt since
+  since some sites are completely password protected.
 
 
 9.2 "Rick and Morty" (released 23.4.2014)
diff --git a/linkcheck/cache/robots_txt.py b/linkcheck/cache/robots_txt.py
index d8e58590..04ee8cd7 100644
--- a/linkcheck/cache/robots_txt.py
+++ b/linkcheck/cache/robots_txt.py
@@ -51,15 +51,13 @@ class RobotsTxt (object):
     def _allows_url (self, url_data, roboturl):
         """Ask robots.txt allowance. Assumes only single thread per robots.txt
         URL calls this function."""
-        user, password = url_data.get_user_password()
         with cache_lock:
             if roboturl in self.cache:
                 self.hits += 1
                 rp = self.cache[roboturl]
                 return rp.can_fetch(self.useragent, url_data.url)
             self.misses += 1
-        rp = robotparser2.RobotFileParser(proxy=url_data.proxy, user=user,
-            password=password)
+        rp = robotparser2.RobotFileParser(proxy=url_data.proxy, auth=url_data.auth)
         rp.set_url(roboturl)
         rp.read()
         with cache_lock:
diff --git a/linkcheck/robotparser2.py b/linkcheck/robotparser2.py
index 9455b835..f5e732db 100644
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@@ -34,13 +34,13 @@ class RobotFileParser (object):
     """This class provides a set of methods to read, parse and answer
     questions about a single robots.txt file."""
 
-    def __init__ (self, url='', proxy=None, user=None, password=None):
+    def __init__ (self, url='', proxy=None, auth=None):
         """Initialize internal entry lists and store given url and
         credentials."""
         self.set_url(url)
         self.proxy = proxy
-        self.user = user
-        self.password = password
+        # XXX proxy
+        self.auth = auth
         self._reset()
 
     def _reset (self):
@@ -77,12 +77,16 @@ class RobotFileParser (object):
     def read (self):
         """Read the robots.txt URL and feeds it to the parser."""
         self._reset()
-        headers = {
-            'User-Agent': configuration.UserAgent,
-            'Accept-Encoding': ACCEPT_ENCODING,
-        }
+        kwargs = dict(
+            headers = {
+                'User-Agent': configuration.UserAgent,
+                'Accept-Encoding': ACCEPT_ENCODING,
+            }
+        )
+        if self.auth:
+            kwargs["auth"] = self.auth
         try:
-            response = requests.get(self.url, headers=headers)
+            response = requests.get(self.url, **kwargs)
             response.raise_for_status()
             content_type = response.headers.get('content-type')
             if content_type and content_type.lower().startswith('text/plain'):