linkchecker/linkcheck/http11lib.py
2000-12-14 22:53:48 +00:00

394 lines
12 KiB
Python

#
# HTTP/1.1 client library
#
# Copyright (C) 1998-1999 Guido van Rossum. All Rights Reserved.
# Written by Greg Stein. Given to Guido. Licensed using the Python license.
#
# This module is maintained by Greg and is available at:
# http://www.lyra.org/greg/python/httplib.py
#
# Since this isn't in the Python distribution yet, we'll use the CVS ID
# for tracking:
# $Id$
#
# Modified by Bastian Kleineidam to squish a bug.
import socket,string,mimetools,httplib
error = __name__ + '.error'
HTTP_PORT = 80
class HTTPResponse(mimetools.Message):
def __init__(self, fp, version, errcode):
mimetools.Message.__init__(self, fp, 0)
if version == 'HTTP/1.0':
self.version = 10
elif version[:7] == 'HTTP/1.':
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
else:
raise error, 'unknown HTTP protocol'
# are we using the chunked-style of transfer encoding?
tr_enc = self.getheader('transfer-encoding')
if tr_enc:
if string.lower(tr_enc) != 'chunked':
raise error, 'unknown transfer-encoding'
self.chunked = 1
self.chunk_left = None
else:
self.chunked = 0
# will the connection close at the end of the response?
conn = self.getheader('connection')
if conn:
conn = string.lower(conn)
# a "Connection: close" will always close the connection. if we
# don't see that and this is not HTTP/1.1, then the connection will
# close unless we see a Keep-Alive header.
self.will_close = string.find(conn, 'close') != -1 or \
( self.version != 11 and \
not self.getheader('keep-alive') )
else:
# for HTTP/1.1, the connection will always remain open
# otherwise, it will remain open IFF we see a Keep-Alive header
self.will_close = self.version != 11 and \
not self.getheader('keep-alive')
# do we have a Content-Length?
# NOTE: RFC 2616, S4.4, #3 states we ignore this if tr_enc is "chunked"
length = self.getheader('content-length')
if length and not self.chunked:
self.length = int(length)
else:
self.length = None
# does the body have a fixed length? (of zero)
if (errcode == 204 or # No Content
errcode == 304 or # Not Modified
100 <= errcode < 200): # 1xx codes
self.length = 0
# if the connection remains open, and we aren't using chunked, and
# a content-length was not provided, then assume that the connection
# WILL close.
if not self.will_close and \
not self.chunked and \
self.length is None:
self.will_close = 1
def close(self):
if self.fp:
self.fp.close()
self.fp = None
def isclosed(self):
# NOTE: it is possible that we will not ever call self.close(). This
# case occurs when will_close is TRUE, length is None, and we
# read up to the last byte, but NOT past it.
#
# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
# called, meaning self.isclosed() is meaningful.
return self.fp is None
def read(self, amt=None):
if not self.fp:
return ''
if self.chunked:
chunk_left = self.chunk_left
value = ''
while 1:
if not chunk_left:
line = self.fp.readline()
i = string.find(line, ';')
if i >= 0:
line = line[:i] # strip chunk-extensions
chunk_left = string.atoi(line, 16)
if chunk_left == 0:
break
if not amt:
value = value + self.fp.read(chunk_left)
elif amt < chunk_left:
value = value + self.fp.read(amt)
self.chunk_left = chunk_left - amt
return value
elif amt == chunk_left:
value = value + self.fp.read(amt)
self.fp.read(2) # toss the CRLF at the end of the chunk
self.chunk_left = None
return value
else:
value = value + self.fp.read(chunk_left)
amt = amt - chunk_left
# we read the whole chunk, get another
self.fp.read(2) # toss the CRLF at the end of the chunk
chunk_left = None
# read and discard trailer up to the CRLF terminator
### note: we shouldn't have any trailers!
while 1:
line = self.fp.readline()
if line == '\r\n':
break
# we read everything; close the "file"
self.close()
return value
elif not amt:
# unbounded read
if self.will_close:
s = self.fp.read()
else:
s = self.fp.read(self.length)
self.close() # we read everything
return s
if self.length is not None:
if amt > self.length:
# clip the read to the "end of response"
amt = self.length
self.length = self.length - amt
s = self.fp.read(amt)
# close our "file" if we know we should
### I'm not sure about the len(s) < amt part; we should be safe because
### we shouldn't be using non-blocking sockets
if self.length == 0 or len(s) < amt:
self.close()
return s
class HTTPConnection:
_http_vsn = 11
_http_vsn_str = 'HTTP/1.1'
response_class = HTTPResponse
def __init__(self, host, port=None):
self.sock = None
self.response = None
self._set_hostport(host, port)
def _set_hostport(self, host, port):
if port is None:
i = string.find(host, ':')
if i >= 0:
port = int(host[i+1:])
host = host[:i]
else:
port = HTTP_PORT
self.host = host
self.port = port
def connect(self):
"""Connect to the host and port specified in __init__."""
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
self.sock.connect((self.host, self.port))
def close(self):
"""Close the connection to the HTTP server."""
if self.sock:
self.sock.close() # close it manually... there may be other refs
self.sock = None
if self.response:
self.response.close()
self.response = None
def send(self, str):
"""Send `str' to the server."""
if not self.sock:
self.connect()
# send the data to the server. if we get a broken pipe, then close
# the socket. we want to reconnect when somebody tries to send again.
#
# NOTE: we DO propagate the error, though, because we cannot simply
# ignore the error... the caller will know if they can retry.
try:
self.sock.send(str)
except socket.error, v:
if v[0] == 32: # Broken pipe
self.close()
raise
def putrequest(self, method, url='/'):
"""Send a request to the server.
`method' specifies an HTTP request method, e.g. 'GET'.
`url' specifies the object being requested, e.g.
'/index.html'.
"""
if self.response:
if not self.response.isclosed():
### implies half-duplex!
raise error, 'prior response has not been fully handled'
self.response = None
if not url:
url = '/'
str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
try:
self.send(str)
except socket.error, v:
if v[0] != 32: # Broken pipe
raise
# try one more time (the socket was closed; this will reopen)
self.send(str)
#self.putheader('Host', self.host)
if self._http_vsn == 11:
# Issue some standard headers for better HTTP/1.1 compliance
# note: we are assuming that clients will not attempt to set these
# headers since *this* library must deal with the consequences.
# this also means that when the supporting libraries are
# updated to recognize other forms, then this code should be
# changed (removed or updated).
# we only want a Content-Encoding of "identity" since we don't
# support encodings such as x-gzip or x-deflate.
self.putheader('Accept-Encoding', 'identity')
# we can accept "chunked" Transfer-Encodings, but no others
# NOTE: no TE header implies *only* "chunked"
#self.putheader('TE', 'chunked')
# if TE is supplied in the header, then it must appear in a
# Connection header.
#self.putheader('Connection', 'TE')
else:
# For HTTP/1.0, the server will assume "not chunked"
pass
def putheader(self, header, value):
"""Send a request header line to the server.
For example: h.putheader('Accept', 'text/html')
"""
str = '%s: %s\r\n' % (header, value)
self.send(str)
def endheaders(self):
"""Indicate that the last header line has been sent to the server."""
self.send('\r\n')
def request(self, method, url='/', body=None, headers={}):
"""Send a complete request to the server."""
self.putrequest(method, url)
if body:
self.putheader('Content-Length', str(len(body)))
for hdr, value in headers.items():
self.putheader(hdr, value)
self.endheaders()
if body:
self.send(body)
def getreply(self):
"""Get a reply from the server.
Returns a tuple consisting of:
- server response code (e.g. '200' if all goes well)
- server response string corresponding to response code
- any RFC822 headers in the response from the server
"""
file = self.sock.makefile('rb')
line = file.readline()
try:
[ver, code, msg] = string.split(line, None, 2)
except ValueError:
try:
[ver, code] = string.split(line, None, 1)
msg = ""
except ValueError:
self.close()
return -1, line, file
if ver[:5] != 'HTTP/':
self.close()
return -1, line, file
errcode = int(code)
errmsg = string.strip(msg)
response = self.response_class(file, ver, errcode)
if response.will_close:
# this effectively passes the connection to the response
self.close()
else:
# remember this, so we can tell when it is complete
self.response = response
return errcode, errmsg, response
class HTTP(HTTPConnection):
"Compatibility class with httplib.py from 1.5."
_http_vsn = 10
_http_vsn_str = 'HTTP/1.0'
def __init__(self, host='', port=None):
"Provide a default host, since the superclass requires one."
# Note that we may pass an empty string as the host; this will throw
# an error when we attempt to connect. Presumably, the client code
# will call connect before then, with a proper host.
HTTPConnection.__init__(self, host, port)
self.debuglevel=0
def connect(self, host=None, port=None):
"Accept arguments to set the host/port, since the superclass doesn't."
if host:
self._set_hostport(host, port)
HTTPConnection.connect(self)
def set_debuglevel(self, debuglevel):
self.debuglevel=debuglevel
def getfile(self):
"Provide a getfile, since the superclass' use of HTTP/1.1 prevents it."
return self.file
def putheader(self, header, *values):
"The superclass allows only one value argument."
HTTPConnection.putheader(self, header, string.joinfields(values,'\r\n\t'))
def getreply(self):
"Compensate for an instance attribute shuffling."
errcode, errmsg, response = HTTPConnection.getreply(self)
if errcode == -1:
self.file = response # response is the "file" when errcode==-1
self.headers = None
return -1, errmsg, None
self.headers = response
self.file = response.fp
return errcode, errmsg, response
def _test():
h = HTTP('www.siemens.de')
h.putrequest("GET")
h.putheader("Host", 'www.siemens.de')
h.endheaders()
status,text,reply = h.getreply()
print status,text,reply
if __name__=='__main__':
_test()