Use HTTPMessage() in all urllib handlers, really fixing the bug noted in http://www.python.org/sf/1117588. The workaround has been removed.

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2603 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-04-28 01:54:42 +00:00 · 2005-05-18 17:53:39 +00:00 · 2005-05-18 17:53:39 +00:00 · 63b76ec642
commit 63b76ec642
parent cb54822dac
1 changed files with 2 additions and 44 deletions
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@ -54,48 +54,6 @@ debug = lambda txt: _msg("debug:", txt)
 warn = lambda txt: _msg("warning:", txt)
 error = lambda txt: _msg("error:", txt)

-class MyHTTPRedirectHandler (urllib2.HTTPRedirectHandler):
-    """
-    Work around for bug http://www.python.org/sf/1117588
-    """
-
-    def http_error_302 (self, req, fp, code, msg, headers):
-        if 'location' in headers:
-            newurl = headers.get('location')
-        elif 'uri' in headers:
-            newurl = headers.get('uri')
-        else:
-            return
-        newurl = urlparse.urljoin(req.get_full_url(), newurl)
-
-        # XXX Probably want to forget about the state of the current
-        # request, although that might interact poorly with other
-        # handlers that also use handler-specific request attributes
-        new = self.redirect_request(req, fp, code, msg, headers, newurl)
-        if new is None:
-            return
-
-        # loop detection
-        # .redirect_dict has a key url if url was previously visited.
-        if hasattr(req, 'redirect_dict'):
-            visited = new.redirect_dict = req.redirect_dict
-            if (visited.get(newurl, 0) >= self.max_repeats or
-                len(visited) >= self.max_redirections):
-                raise HTTPError, (req.get_full_url(), code,
-                                  self.inf_msg + msg, headers, fp)
-        else:
-            visited = new.redirect_dict = req.redirect_dict = {}
-        visited[newurl] = visited.get(newurl, 0) + 1
-
-        # Don't close the fp until we are sure that we won't use it
-        # with HTTPError.
-        fp.read()
-        fp.close()
-
-        return self.parent.open(new)
-
-    http_error_301 = http_error_303 = http_error_307 = http_error_302
-

 class PasswordManager (object):
    """
@ -205,7 +163,7 @@ class RobotFileParser (object):
            urllib2.HTTPDigestAuthHandler(pwd_manager),
            urllib2.ProxyDigestAuthHandler(pwd_manager),
            urllib2.HTTPDefaultErrorHandler,
-            MyHTTPRedirectHandler,
+            urllib2.HTTPRedirectHandler,
        ]
        if hasattr(linkcheck.httplib2, 'HTTPS'):
            handlers.append(HttpsWithGzipHandler)
@ -514,7 +472,7 @@ def decode (page):
                 "%s, assuming non-compressed content" % str(msg))
            fp = StringIO.StringIO(content)
        # remove content-encoding header
-        headers = {}
+        headers = httplib.HTTPMessage(StringIO.StringIO(""))
        ceheader = re.compile(r"(?i)content-encoding:")
        for h in page.info().keys():
            if not ceheader.match(h):