mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-21 16:30:28 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@79 e7d03fd6-7b0d-0410-9947-9c21f3af8025
394 lines
12 KiB
Python
394 lines
12 KiB
Python
#
|
|
# HTTP/1.1 client library
|
|
#
|
|
# Copyright (C) 1998-1999 Guido van Rossum. All Rights Reserved.
|
|
# Written by Greg Stein. Given to Guido. Licensed using the Python license.
|
|
#
|
|
# This module is maintained by Greg and is available at:
|
|
# http://www.lyra.org/greg/python/httplib.py
|
|
#
|
|
# Since this isn't in the Python distribution yet, we'll use the CVS ID
|
|
# for tracking:
|
|
# $Id$
|
|
#
|
|
# Modified by Bastian Kleineidam to squish a bug.
|
|
|
|
import socket,string,mimetools,httplib
|
|
|
|
|
|
error = __name__ + '.error'
|
|
|
|
HTTP_PORT = 80
|
|
|
|
class HTTPResponse(mimetools.Message):
|
|
def __init__(self, fp, version, errcode):
|
|
mimetools.Message.__init__(self, fp, 0)
|
|
|
|
if version == 'HTTP/1.0':
|
|
self.version = 10
|
|
elif version[:7] == 'HTTP/1.':
|
|
self.version = 11 # use HTTP/1.1 code for HTTP/1.x where x>=1
|
|
else:
|
|
raise error, 'unknown HTTP protocol'
|
|
|
|
# are we using the chunked-style of transfer encoding?
|
|
tr_enc = self.getheader('transfer-encoding')
|
|
if tr_enc:
|
|
if string.lower(tr_enc) != 'chunked':
|
|
raise error, 'unknown transfer-encoding'
|
|
self.chunked = 1
|
|
self.chunk_left = None
|
|
else:
|
|
self.chunked = 0
|
|
|
|
# will the connection close at the end of the response?
|
|
conn = self.getheader('connection')
|
|
if conn:
|
|
conn = string.lower(conn)
|
|
# a "Connection: close" will always close the connection. if we
|
|
# don't see that and this is not HTTP/1.1, then the connection will
|
|
# close unless we see a Keep-Alive header.
|
|
self.will_close = string.find(conn, 'close') != -1 or \
|
|
( self.version != 11 and \
|
|
not self.getheader('keep-alive') )
|
|
else:
|
|
# for HTTP/1.1, the connection will always remain open
|
|
# otherwise, it will remain open IFF we see a Keep-Alive header
|
|
self.will_close = self.version != 11 and \
|
|
not self.getheader('keep-alive')
|
|
|
|
# do we have a Content-Length?
|
|
# NOTE: RFC 2616, S4.4, #3 states we ignore this if tr_enc is "chunked"
|
|
length = self.getheader('content-length')
|
|
if length and not self.chunked:
|
|
self.length = int(length)
|
|
else:
|
|
self.length = None
|
|
|
|
# does the body have a fixed length? (of zero)
|
|
if (errcode == 204 or # No Content
|
|
errcode == 304 or # Not Modified
|
|
100 <= errcode < 200): # 1xx codes
|
|
self.length = 0
|
|
|
|
# if the connection remains open, and we aren't using chunked, and
|
|
# a content-length was not provided, then assume that the connection
|
|
# WILL close.
|
|
if not self.will_close and \
|
|
not self.chunked and \
|
|
self.length is None:
|
|
self.will_close = 1
|
|
|
|
|
|
def close(self):
|
|
if self.fp:
|
|
self.fp.close()
|
|
self.fp = None
|
|
|
|
|
|
def isclosed(self):
|
|
# NOTE: it is possible that we will not ever call self.close(). This
|
|
# case occurs when will_close is TRUE, length is None, and we
|
|
# read up to the last byte, but NOT past it.
|
|
#
|
|
# IMPLIES: if will_close is FALSE, then self.close() will ALWAYS be
|
|
# called, meaning self.isclosed() is meaningful.
|
|
return self.fp is None
|
|
|
|
|
|
def read(self, amt=None):
|
|
if not self.fp:
|
|
return ''
|
|
|
|
if self.chunked:
|
|
chunk_left = self.chunk_left
|
|
value = ''
|
|
while 1:
|
|
if not chunk_left:
|
|
line = self.fp.readline()
|
|
i = string.find(line, ';')
|
|
if i >= 0:
|
|
line = line[:i] # strip chunk-extensions
|
|
chunk_left = string.atoi(line, 16)
|
|
if chunk_left == 0:
|
|
break
|
|
if not amt:
|
|
value = value + self.fp.read(chunk_left)
|
|
elif amt < chunk_left:
|
|
value = value + self.fp.read(amt)
|
|
self.chunk_left = chunk_left - amt
|
|
return value
|
|
elif amt == chunk_left:
|
|
value = value + self.fp.read(amt)
|
|
self.fp.read(2) # toss the CRLF at the end of the chunk
|
|
self.chunk_left = None
|
|
return value
|
|
else:
|
|
value = value + self.fp.read(chunk_left)
|
|
amt = amt - chunk_left
|
|
|
|
# we read the whole chunk, get another
|
|
self.fp.read(2) # toss the CRLF at the end of the chunk
|
|
chunk_left = None
|
|
|
|
# read and discard trailer up to the CRLF terminator
|
|
### note: we shouldn't have any trailers!
|
|
while 1:
|
|
line = self.fp.readline()
|
|
if line == '\r\n':
|
|
break
|
|
|
|
# we read everything; close the "file"
|
|
self.close()
|
|
|
|
return value
|
|
|
|
elif not amt:
|
|
# unbounded read
|
|
if self.will_close:
|
|
s = self.fp.read()
|
|
else:
|
|
s = self.fp.read(self.length)
|
|
self.close() # we read everything
|
|
return s
|
|
|
|
if self.length is not None:
|
|
if amt > self.length:
|
|
# clip the read to the "end of response"
|
|
amt = self.length
|
|
self.length = self.length - amt
|
|
|
|
s = self.fp.read(amt)
|
|
|
|
# close our "file" if we know we should
|
|
### I'm not sure about the len(s) < amt part; we should be safe because
|
|
### we shouldn't be using non-blocking sockets
|
|
if self.length == 0 or len(s) < amt:
|
|
self.close()
|
|
|
|
return s
|
|
|
|
|
|
class HTTPConnection:
|
|
|
|
_http_vsn = 11
|
|
_http_vsn_str = 'HTTP/1.1'
|
|
|
|
response_class = HTTPResponse
|
|
|
|
def __init__(self, host, port=None):
|
|
self.sock = None
|
|
self.response = None
|
|
self._set_hostport(host, port)
|
|
|
|
def _set_hostport(self, host, port):
|
|
if port is None:
|
|
i = string.find(host, ':')
|
|
if i >= 0:
|
|
port = int(host[i+1:])
|
|
host = host[:i]
|
|
else:
|
|
port = HTTP_PORT
|
|
self.host = host
|
|
self.port = port
|
|
|
|
def connect(self):
|
|
"""Connect to the host and port specified in __init__."""
|
|
self.sock = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
|
self.sock.connect(self.host, self.port)
|
|
|
|
def close(self):
|
|
"""Close the connection to the HTTP server."""
|
|
if self.sock:
|
|
self.sock.close() # close it manually... there may be other refs
|
|
self.sock = None
|
|
if self.response:
|
|
self.response.close()
|
|
self.response = None
|
|
|
|
def send(self, str):
|
|
"""Send `str' to the server."""
|
|
if not self.sock:
|
|
self.connect()
|
|
|
|
# send the data to the server. if we get a broken pipe, then close
|
|
# the socket. we want to reconnect when somebody tries to send again.
|
|
#
|
|
# NOTE: we DO propagate the error, though, because we cannot simply
|
|
# ignore the error... the caller will know if they can retry.
|
|
try:
|
|
self.sock.send(str)
|
|
except socket.error, v:
|
|
if v[0] == 32: # Broken pipe
|
|
self.close()
|
|
raise
|
|
|
|
def putrequest(self, method, url='/'):
|
|
"""Send a request to the server.
|
|
|
|
`method' specifies an HTTP request method, e.g. 'GET'.
|
|
`url' specifies the object being requested, e.g.
|
|
'/index.html'.
|
|
"""
|
|
if self.response:
|
|
if not self.response.isclosed():
|
|
### implies half-duplex!
|
|
raise error, 'prior response has not been fully handled'
|
|
self.response = None
|
|
|
|
if not url:
|
|
url = '/'
|
|
str = '%s %s %s\r\n' % (method, url, self._http_vsn_str)
|
|
|
|
try:
|
|
self.send(str)
|
|
except socket.error, v:
|
|
if v[0] != 32: # Broken pipe
|
|
raise
|
|
# try one more time (the socket was closed; this will reopen)
|
|
self.send(str)
|
|
|
|
#self.putheader('Host', self.host)
|
|
|
|
if self._http_vsn == 11:
|
|
# Issue some standard headers for better HTTP/1.1 compliance
|
|
|
|
# note: we are assuming that clients will not attempt to set these
|
|
# headers since *this* library must deal with the consequences.
|
|
# this also means that when the supporting libraries are
|
|
# updated to recognize other forms, then this code should be
|
|
# changed (removed or updated).
|
|
|
|
# we only want a Content-Encoding of "identity" since we don't
|
|
# support encodings such as x-gzip or x-deflate.
|
|
self.putheader('Accept-Encoding', 'identity')
|
|
|
|
# we can accept "chunked" Transfer-Encodings, but no others
|
|
# NOTE: no TE header implies *only* "chunked"
|
|
#self.putheader('TE', 'chunked')
|
|
|
|
# if TE is supplied in the header, then it must appear in a
|
|
# Connection header.
|
|
#self.putheader('Connection', 'TE')
|
|
|
|
else:
|
|
# For HTTP/1.0, the server will assume "not chunked"
|
|
pass
|
|
|
|
def putheader(self, header, value):
|
|
"""Send a request header line to the server.
|
|
|
|
For example: h.putheader('Accept', 'text/html')
|
|
"""
|
|
str = '%s: %s\r\n' % (header, value)
|
|
self.send(str)
|
|
|
|
def endheaders(self):
|
|
"""Indicate that the last header line has been sent to the server."""
|
|
|
|
self.send('\r\n')
|
|
|
|
def request(self, method, url='/', body=None, headers={}):
|
|
"""Send a complete request to the server."""
|
|
|
|
self.putrequest(method, url)
|
|
|
|
if body:
|
|
self.putheader('Content-Length', str(len(body)))
|
|
for hdr, value in headers.items():
|
|
self.putheader(hdr, value)
|
|
self.endheaders()
|
|
|
|
if body:
|
|
self.send(body)
|
|
|
|
def getreply(self):
|
|
"""Get a reply from the server.
|
|
|
|
Returns a tuple consisting of:
|
|
- server response code (e.g. '200' if all goes well)
|
|
- server response string corresponding to response code
|
|
- any RFC822 headers in the response from the server
|
|
|
|
"""
|
|
file = self.sock.makefile('rb')
|
|
line = file.readline()
|
|
try:
|
|
[ver, code, msg] = string.split(line, None, 2)
|
|
except ValueError:
|
|
try:
|
|
[ver, code] = string.split(line, None, 1)
|
|
msg = ""
|
|
except ValueError:
|
|
self.close()
|
|
return -1, line, file
|
|
if ver[:5] != 'HTTP/':
|
|
self.close()
|
|
return -1, line, file
|
|
errcode = int(code)
|
|
errmsg = string.strip(msg)
|
|
response = self.response_class(file, ver, errcode)
|
|
if response.will_close:
|
|
# this effectively passes the connection to the response
|
|
self.close()
|
|
else:
|
|
# remember this, so we can tell when it is complete
|
|
self.response = response
|
|
return errcode, errmsg, response
|
|
|
|
|
|
class HTTP(HTTPConnection):
|
|
"Compatibility class with httplib.py from 1.5."
|
|
|
|
_http_vsn = 10
|
|
_http_vsn_str = 'HTTP/1.0'
|
|
|
|
def __init__(self, host='', port=None):
|
|
"Provide a default host, since the superclass requires one."
|
|
|
|
# Note that we may pass an empty string as the host; this will throw
|
|
# an error when we attempt to connect. Presumably, the client code
|
|
# will call connect before then, with a proper host.
|
|
HTTPConnection.__init__(self, host, port)
|
|
self.debuglevel=0
|
|
|
|
def connect(self, host=None, port=None):
|
|
"Accept arguments to set the host/port, since the superclass doesn't."
|
|
|
|
if host:
|
|
self._set_hostport(host, port)
|
|
HTTPConnection.connect(self)
|
|
|
|
def set_debuglevel(self, debuglevel):
|
|
self.debuglevel=debuglevel
|
|
|
|
def getfile(self):
|
|
"Provide a getfile, since the superclass' use of HTTP/1.1 prevents it."
|
|
return self.file
|
|
|
|
def putheader(self, header, *values):
|
|
"The superclass allows only one value argument."
|
|
HTTPConnection.putheader(self, header, string.joinfields(values,'\r\n\t'))
|
|
|
|
def getreply(self):
|
|
"Compensate for an instance attribute shuffling."
|
|
errcode, errmsg, response = HTTPConnection.getreply(self)
|
|
if errcode == -1:
|
|
self.file = response # response is the "file" when errcode==-1
|
|
self.headers = None
|
|
return -1, errmsg, None
|
|
|
|
self.headers = response
|
|
self.file = response.fp
|
|
return errcode, errmsg, response
|
|
|
|
def _test():
|
|
h = HTTP('www.siemens.de')
|
|
h.putrequest("GET")
|
|
h.putheader("Host", 'www.siemens.de')
|
|
h.endheaders()
|
|
status,text,reply = h.getreply()
|
|
print status,text,reply
|
|
|
|
if __name__=='__main__':
|
|
_test()
|