# -*- coding: iso-8859-1 -*- # Various HTTP utils with a free license from cStringIO import StringIO from . import gzip2 as gzip from . import httplib2 as httplib from . import log, LOG_CHECK, fileutil import re import zlib import urllib import urllib2 ########################################################################### # urlutils.py - Simplified urllib handling # # Written by Chris Lawrence # (C) 1999-2002 Chris Lawrence # # This program is freely distributable per the following license: # ## Permission to use, copy, modify, and distribute this software and its ## documentation for any purpose and without fee is hereby granted, ## provided that the above copyright notice appears in all copies and that ## both that copyright notice and this permission notice appear in ## supporting documentation. ## ## I DISCLAIM ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL ## IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL I ## BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY ## DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, ## WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ## ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS ## SOFTWARE. def decode (page): """Gunzip or deflate a compressed page.""" log.debug(LOG_CHECK, "robots.txt page info %d %s", page.code, str(page.info())) encoding = page.info().get("Content-Encoding") if encoding in ('gzip', 'x-gzip', 'deflate'): # cannot seek in socket descriptors, so must get content now content = page.read() try: if encoding == 'deflate': fp = StringIO(zlib.decompress(content)) else: fp = gzip.GzipFile('', 'rb', 9, StringIO(content)) except zlib.error, msg: log.debug(LOG_CHECK, "uncompressing had error " "%s, assuming non-compressed content", str(msg)) fp = StringIO(content) # remove content-encoding header headers = httplib.HTTPMessage(StringIO("")) ceheader = re.compile(r"(?i)content-encoding:") for h in page.info().keys(): if not ceheader.match(h): headers[h] = page.info()[h] newpage = urllib.addinfourl(fp, headers, page.geturl()) newpage.code = page.code newpage.msg = page.msg return page class HttpWithGzipHandler (urllib2.HTTPHandler): """Support gzip encoding.""" def http_open (self, req): """Send request and decode answer.""" return decode(urllib2.HTTPHandler.http_open(self, req)) if hasattr(httplib, 'HTTPS'): class HttpsWithGzipHandler (urllib2.HTTPSHandler): """Support gzip encoding.""" def http_open (self, req): """Send request and decode answer.""" return decode(urllib2.HTTPSHandler.http_open(self, req)) # end of urlutils.py routines ########################################################################### def encode_multipart_formdata(fields, files=None): """ From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/146306 fields is a sequence of (name, value) elements for regular form fields. files is a sequence of (name, filename, value) elements for data to be uploaded as files. Return (content_type, body) ready for httplib.HTTP instance """ BOUNDARY = '----------ThIs_Is_tHe_bouNdaRY_$' CRLF = '\r\n' L = [] for (key, value) in fields: L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"' % key) L.append('') L.append(value) if files is not None: for (key, filename, value) in files: content_type = fileutil.guess_mimetype(filename) L.append('--' + BOUNDARY) L.append('Content-Disposition: form-data; name="%s"; filename="%s"' % (key, filename)) L.append('Content-Type: %s' % content_type) L.append('') L.append(value) L.append('--' + BOUNDARY + '--') L.append('') body = CRLF.join(L) content_type = 'multipart/form-data; boundary=%s' % BOUNDARY return content_type, body