Move http util function in a separate module.

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3747 e7d03fd6-7b0d-0410-9947-9c21f3af8025
2026-05-27 23:33:59 +00:00 · 2008-05-08 10:36:08 +00:00 · 2008-05-08 10:36:08 +00:00 · bc9b9ee07e
commit bc9b9ee07e
parent d5ec7c1ac3
2 changed files with 125 additions and 84 deletions
--- a/linkcheck/httputil.py
+++ b/linkcheck/httputil.py
@ -0,0 +1,121 @@
+# -*- coding: iso-8859-1 -*-
+# Various HTTP utils with a free license
+from cStringIO import StringIO
+from . import gzip2 as gzip
+from . import httplib2 as httplib
+from . import log, LOG_CHECK
+import re
+import mimetypes
+import zlib
+import urllib
+import urllib2
+
+
+###########################################################################
+# urlutils.py - Simplified urllib handling
+#
+#   Written by Chris Lawrence <lawrencc@debian.org>
+#   (C) 1999-2002 Chris Lawrence
+#
+# This program is freely distributable per the following license:
+#
+##  Permission to use, copy, modify, and distribute this software and its
+##  documentation for any purpose and without fee is hereby granted,
+##  provided that the above copyright notice appears in all copies and that
+##  both that copyright notice and this permission notice appear in
+##  supporting documentation.
+##
+##  I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
+##  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I
+##  BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
+##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
+##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
+##  SOFTWARE.
+def decode (page):
+    """Gunzip or deflate a compressed page."""
+    log.debug(LOG_CHECK,
+      "robots.txt page info %d %s", page.code, str(page.info()))
+    encoding = page.info().get("Content-Encoding")
+    if encoding in ('gzip', 'x-gzip', 'deflate'):
+        # cannot seek in socket descriptors, so must get content now
+        content = page.read()
+        try:
+            if encoding == 'deflate':
+                fp = StringIO(zlib.decompress(content))
+            else:
+                fp = gzip.GzipFile('', 'rb', 9, StringIO(content))
+        except zlib.error, msg:
+            log.debug(LOG_CHECK, "uncompressing had error "
+                 "%s, assuming non-compressed content", str(msg))
+            fp = StringIO(content)
+        # remove content-encoding header
+        headers = httplib.HTTPMessage(StringIO(""))
+        ceheader = re.compile(r"(?i)content-encoding:")
+        for h in page.info().keys():
+            if not ceheader.match(h):
+                headers[h] = page.info()[h]
+        newpage = urllib.addinfourl(fp, headers, page.geturl())
+        if hasattr(page, "code"):
+            # python 2.4 compatibility
+            newpage.code = page.code
+        if hasattr(page, "msg"):
+            # python 2.4 compatibility
+            newpage.msg = page.msg
+        page = newpage
+    return page
+
+
+class HttpWithGzipHandler (urllib2.HTTPHandler):
+    """Support gzip encoding."""
+    def http_open (self, req):
+        """Send request and decode answer."""
+        return decode(urllib2.HTTPHandler.http_open(self, req))
+
+
+if hasattr(httplib, 'HTTPS'):
+    class HttpsWithGzipHandler (urllib2.HTTPSHandler):
+        """Support gzip encoding."""
+
+        def http_open (self, req):
+            """Send request and decode answer."""
+            return decode(urllib2.HTTPSHandler.http_open(self, req))
+
+# end of urlutils.py routines
+###########################################################################
+
+
+def encode_multipart_formdata(fields, files=None):
+    """
+    From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306
+
+    fields is a sequence of (name, value) elements for regular form fields.
+    files is a sequence of (name, filename, value) elements for data to be
+    uploaded as files.
+    Return (content_type, body) ready for httplib.HTTP instance
+    """
+    BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$'
+    CRLF = '\r\n'
+    L = []
+    for (key, value) in fields:
+        L.append('--' + BOUNDARY)
+        L.append('Content-Disposition: form-data; name="%s"' % key)
+        L.append('')
+        L.append(value)
+    if files is not None:
+        for (key, filename, value) in files:
+            L.append('--' + BOUNDARY)
+            L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename))
+            L.append('Content-Type: %s' % get_content_type(filename))
+            L.append('')
+            L.append(value)
+    L.append('--' + BOUNDARY + '--')
+    L.append('')
+    body = CRLF.join(L)
+    content_type = 'multipart/form-data; boundary=%s' % BOUNDARY
+    return content_type, body
+
+
+def get_content_type(filename):
+    return mimetypes.guess_type(filename)[0] or 'application/octet-stream'
+
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@ -20,21 +20,14 @@ Robots.txt parser.
 The robots.txt Exclusion Protocol is implemented as specified in
 http://www.robotstxt.org/wc/norobots-rfc.html
 """
-
 import urlparse
-import httplib
 import urllib
 import urllib2
 import time
 import socket
-import re
-import zlib
 import sys
-import cStringIO as StringIO
-import linkcheck
-from . import configuration
-from . import log, LOG_CHECK
-from . import gzip2 as gzip
+from . import httplib2 as httplib
+from . import log, LOG_CHECK, configuration, httputil

 __all__ = ["RobotFileParser"]

@ -116,7 +109,7 @@ class RobotFileParser (object):
        handlers = [
            urllib2.ProxyHandler(urllib.getproxies()),
            urllib2.UnknownHandler,
-            HttpWithGzipHandler,
+            httputil.HttpWithGzipHandler,
            urllib2.HTTPBasicAuthHandler(pwd_manager),
            urllib2.ProxyBasicAuthHandler(pwd_manager),
            urllib2.HTTPDigestAuthHandler(pwd_manager),
@ -125,7 +118,7 @@ class RobotFileParser (object):
            urllib2.HTTPRedirectHandler,
        ]
        if hasattr(httplib, 'HTTPS'):
-            handlers.append(HttpsWithGzipHandler)
+            handlers.append(httputil.HttpsWithGzipHandler)
        return urllib2.build_opener(*handlers)

    def read (self):
@ -423,76 +416,3 @@ class Entry (object):
            if line.applies_to(path):
                return line.allowance
        return True
-
-###########################################################################
-# urlutils.py - Simplified urllib handling
-#
-#   Written by Chris Lawrence <lawrencc@debian.org>
-#   (C) 1999-2002 Chris Lawrence
-#
-# This program is freely distributable per the following license:
-#
-##  Permission to use, copy, modify, and distribute this software and its
-##  documentation for any purpose and without fee is hereby granted,
-##  provided that the above copyright notice appears in all copies and that
-##  both that copyright notice and this permission notice appear in
-##  supporting documentation.
-##
-##  I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL
-##  IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I
-##  BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY
-##  DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
-##  WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION,
-##  ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS
-##  SOFTWARE.
-def decode (page):
-    """Gunzip or deflate a compressed page."""
-    log.debug(LOG_CHECK,
-      "robots.txt page info %d %s", page.code, str(page.info()))
-    encoding = page.info().get("Content-Encoding")
-    if encoding in ('gzip', 'x-gzip', 'deflate'):
-        # cannot seek in socket descriptors, so must get content now
-        content = page.read()
-        try:
-            if encoding == 'deflate':
-                fp = StringIO.StringIO(zlib.decompress(content))
-            else:
-                fp = gzip.GzipFile('', 'rb', 9, StringIO.StringIO(content))
-        except zlib.error, msg:
-            log.debug(LOG_CHECK, "uncompressing had error "
-                 "%s, assuming non-compressed content", str(msg))
-            fp = StringIO.StringIO(content)
-        # remove content-encoding header
-        headers = httplib.HTTPMessage(StringIO.StringIO(""))
-        ceheader = re.compile(r"(?i)content-encoding:")
-        for h in page.info().keys():
-            if not ceheader.match(h):
-                headers[h] = page.info()[h]
-        newpage = urllib.addinfourl(fp, headers, page.geturl())
-        if hasattr(page, "code"):
-            # python 2.4 compatibility
-            newpage.code = page.code
-        if hasattr(page, "msg"):
-            # python 2.4 compatibility
-            newpage.msg = page.msg
-        page = newpage
-    return page
-
-
-class HttpWithGzipHandler (urllib2.HTTPHandler):
-    """Support gzip encoding."""
-    def http_open (self, req):
-        """Send request and decode answer."""
-        return decode(urllib2.HTTPHandler.http_open(self, req))
-
-
-if hasattr(httplib, 'HTTPS'):
-    class HttpsWithGzipHandler (urllib2.HTTPSHandler):
-        """Support gzip encoding."""
-
-        def http_open (self, req):
-            """Send request and decode answer."""
-            return decode(urllib2.HTTPSHandler.http_open(self, req))
-
-# end of urlutils.py routines
-###########################################################################