From 255b698f448f7c792c9c19fcca40c245901b6723 Mon Sep 17 00:00:00 2001 From: calvin Date: Wed, 19 Jan 2005 14:52:27 +0000 Subject: [PATCH] updated to latest upstream CVS version of 20050119 git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2162 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/httplib2.py | 150 ++++++++++++++++++++++++++++++++++-------- 1 file changed, 122 insertions(+), 28 deletions(-) diff --git a/linkcheck/httplib2.py b/linkcheck/httplib2.py index 38165bee..69077313 100644 --- a/linkcheck/httplib2.py +++ b/linkcheck/httplib2.py @@ -93,6 +93,66 @@ _CS_IDLE = 'Idle' _CS_REQ_STARTED = 'Request-started' _CS_REQ_SENT = 'Request-sent' +# status codes +# informational +CONTINUE = 100 +SWITCHING_PROTOCOLS = 101 +PROCESSING = 102 + +# successful +OK = 200 +CREATED = 201 +ACCEPTED = 202 +NON_AUTHORITATIVE_INFORMATION = 203 +NO_CONTENT = 204 +RESET_CONTENT = 205 +PARTIAL_CONTENT = 206 +MULTI_STATUS = 207 +IM_USED = 226 + +# redirection +MULTIPLE_CHOICES = 300 +MOVED_PERMANENTLY = 301 +FOUND = 302 +SEE_OTHER = 303 +NOT_MODIFIED = 304 +USE_PROXY = 305 +TEMPORARY_REDIRECT = 307 + +# client error +BAD_REQUEST = 400 +UNAUTHORIZED = 401 +PAYMENT_REQUIRED = 402 +FORBIDDEN = 403 +NOT_FOUND = 404 +METHOD_NOT_ALLOWED = 405 +NOT_ACCEPTABLE = 406 +PROXY_AUTHENTICATION_REQUIRED = 407 +REQUEST_TIMEOUT = 408 +CONFLICT = 409 +GONE = 410 +LENGTH_REQUIRED = 411 +PRECONDITION_FAILED = 412 +REQUEST_ENTITY_TOO_LARGE = 413 +REQUEST_URI_TOO_LONG = 414 +UNSUPPORTED_MEDIA_TYPE = 415 +REQUESTED_RANGE_NOT_SATISFIABLE = 416 +EXPECTATION_FAILED = 417 +UNPROCESSABLE_ENTITY = 422 +LOCKED = 423 +FAILED_DEPENDENCY = 424 +UPGRADE_REQUIRED = 426 + +# server error +INTERNAL_SERVER_ERROR = 500 +NOT_IMPLEMENTED = 501 +BAD_GATEWAY = 502 +SERVICE_UNAVAILABLE = 503 +GATEWAY_TIMEOUT = 504 +HTTP_VERSION_NOT_SUPPORTED = 505 +INSUFFICIENT_STORAGE = 507 +NOT_EXTENDED = 510 + class HTTPMessage(mimetools.Message): def addheader(self, key, value): @@ -271,7 +331,7 @@ class HTTPResponse: # read until we get a non-100 response while True: version, status, reason = self._read_status() - if status != 100: + if status != CONTINUE: break # skip the header from the 100 response while True: @@ -329,8 +389,7 @@ class HTTPResponse: self.length = None # does the body have a fixed length? (of zero) - if (status == 204 or # No Content - status == 304 or # Not Modified + if (status == NO_CONTENT or status == NOT_MODIFIED or 100 <= status < 200 or # 1xx codes self._method == 'HEAD'): self.length = 0 @@ -344,24 +403,30 @@ class HTTPResponse: self.will_close = 1 def _check_close(self): + conn = self.msg.getheader('connection') if self.version == 11: # An HTTP/1.1 proxy is assumed to stay open unless # explicitly closed. conn = self.msg.getheader('connection') - if conn and conn.lower().find("close") >= 0: + if conn and "close" in conn.lower(): return True return False - # An HTTP/1.0 response with a Connection header is probably - # the result of a confused proxy. Ignore it. + # Some HTTP/1.0 implementations have support for persistent + # connections, using rules different than HTTP/1.1. # For older HTTP, Keep-Alive indiciates persistent connection. if self.msg.getheader('keep-alive'): return False + # At least Akamai returns a "Connection: Keep-Alive" header, + # which was supposed to be sent by the client. + if conn and "keep-alive" in conn.lower(): + return False + # Proxy-Connection is a netscape hack. pconn = self.msg.getheader('proxy-connection') - if pconn and pconn.lower().find("keep-alive") >= 0: + if pconn and "keep-alive" in pconn.lower(): return False # otherwise, assume it will close @@ -381,6 +446,8 @@ class HTTPResponse: # called, meaning self.isclosed() is meaningful. return self.fp is None + # XXX It would be nice to have readline and __iter__ for this, too. + def read(self, amt=None): if self.fp is None: return '' @@ -390,10 +457,11 @@ class HTTPResponse: if amt is None: # unbounded read - if self.will_close: + if self.length is None: s = self.fp.read() else: s = self._safe_read(self.length) + self.length = 0 self.close() # we read everything return s @@ -401,12 +469,13 @@ class HTTPResponse: if amt > self.length: # clip the read to the "end of response" amt = self.length - self.length -= amt # we do not use _safe_read() here because this may be a .will_close # connection, and the user is reading more bytes than will be provided # (for example, reading in 1k chunks) s = self.fp.read(amt) + if self.length is not None: + self.length -= len(s) return s @@ -426,7 +495,7 @@ class HTTPResponse: try: chunk_left = int(line, 16) except ValueError, msg: - raise IncompleteRead("Invalid chunk length at %r"%line) + raise IncompleteRead("Invalid chunk length at %r" % line) if chunk_left == 0: break if amt is None: @@ -488,6 +557,12 @@ class HTTPResponse: raise ResponseNotReady() return self.msg.getheader(name, default) + def getheaders(self): + """Return list of (header, value) tuples.""" + if self.msg is None: + raise ResponseNotReady() + return self.msg.items() + class HTTPConnection: @@ -513,8 +588,9 @@ class HTTPConnection: def _set_hostport(self, host, port): if port is None: - i = host.find(':') - if i >= 0: + i = host.rfind(':') + j = host.rfind(']') # ipv6 addresses have [...] + if i > j: try: port = int(host[i+1:]) except ValueError: @@ -522,6 +598,8 @@ class HTTPConnection: host = host[:i] else: port = self.default_port + if host and host[0] == '[' and host[-1] == ']': + host = host[1:-1] self.host = host self.port = port @@ -599,18 +677,21 @@ class HTTPConnection: del self._buffer[:] self.send(msg) - def putrequest(self, method, url, skip_host=0): + def putrequest(self, method, url, skip_host=0, skip_accept_encoding=0): """Send a request to the server. `method' specifies an HTTP request method, e.g. 'GET'. `url' specifies the object being requested, e.g. '/index.html'. + `skip_host' if True does not add automatically a 'Host:' header + `skip_accept_encoding' if True does not add automatically an + 'Accept-Encoding:' header """ # if a prior response has been completed, then forget about it. if self.__response and self.__response.isclosed(): self.__response = None - # + # in certain cases, we cannot issue another request on this connection. # this occurs when: # 1) we are in the process of sending a request. (_CS_REQ_STARTED) @@ -669,8 +750,7 @@ class HTTPConnection: elif self.port == HTTP_PORT: self.putheader('Host', self.host.encode("idna")) else: - self.putheader('Host', "%s:%s" % \ - (self.host.encode("idna"), self.port)) + self.putheader('Host', "%s:%s" % (self.host.encode("idna"), self.port)) # note: we are assuming that clients will not attempt to set these # headers since *this* library must deal with the @@ -678,6 +758,11 @@ class HTTPConnection: # libraries are updated to recognize other forms, then this # code should be changed (removed or updated). + # we only want a Content-Encoding of "identity" since we don't + # support encodings such as x-gzip or x-deflate. + if not skip_accept_encoding: + self.putheader('Accept-Encoding', 'identity') + # we can accept "chunked" Transfer-Encodings, but no others # NOTE: no TE header implies *only* "chunked" #self.putheader('TE', 'chunked') @@ -724,15 +809,17 @@ class HTTPConnection: self._send_request(method, url, body, headers) def _send_request(self, method, url, body, headers): - # If headers already contains a host header, then define the - # optional skip_host argument to putrequest(). The check is - # more delicate because field names are case insensitive. - if 'host' in [k.lower() for k in headers]: - self.putrequest(method, url, skip_host=1) - else: - self.putrequest(method, url) + # honour explicitly requested Host: and Accept-Encoding headers + header_names = dict.fromkeys([k.lower() for k in headers]) + skips = {} + if 'host' in header_names: + skips['skip_host'] = 1 + if 'accept-encoding' in header_names: + skips['skip_accept_encoding'] = 1 - if body: + self.putrequest(method, url, **skips) + + if body and ('content-length' not in header_names): self.putheader('Content-Length', str(len(body))) for hdr, value in headers.iteritems(): self.putheader(hdr, value) @@ -1177,7 +1264,9 @@ class LineAndFileWrapper: self.readlines = self._file.readlines def read(self, amt=None): - assert not self._line_consumed and self._line_left + if self._line_consumed: + return self._file.read(amt) + assert self._line_left if amt is None or amt > self._line_left: s = self._line[self._line_offset:] self._done() @@ -1197,11 +1286,17 @@ class LineAndFileWrapper: return s def readline(self): + if self._line_consumed: + return self._file.readline() + assert self._line_left s = self._line[self._line_offset:] self._done() return s def readlines(self, size=None): + if self._line_consumed: + return self._file.readlines(size) + assert self._line_left L = [self._line[self._line_offset:]] self._done() if size is None: @@ -1209,7 +1304,7 @@ class LineAndFileWrapper: else: return L + self._file.readlines(size) -def test (): +def test(): """Test this module. A hodge podge of tests collected here, because they have too many @@ -1221,8 +1316,7 @@ def test (): opts, args = getopt.getopt(sys.argv[1:], 'd') dl = 0 for o, a in opts: - if o == '-d': - dl = dl + 1 + if o == '-d': dl = dl + 1 host = 'www.python.org' selector = '/' if args[0:]: host = args[0]