Read complete body data on persistent connections, else subsequent requests could fail.

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3568 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2007-08-08 19:33:10 +00:00
parent 2b94c0c161
commit 2edfaea03e

View file

@ -494,9 +494,13 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.url_connection.putheader(name, value)
self.url_connection.endheaders()
response = self.url_connection.getresponse()
self.persistent = not response.will_close
self.timeout = headers.http_timeout(response)
self.headers = response.msg
self.persistent = not response.will_close
if self.persistent and (self.method == "GET" or
self.headers.getheader("Content-Length", "")):
# always read content from persistent connections
self._read_content(response)
# If possible, use official W3C HTTP response name
if response.status in httpresponses:
response.reason = httpresponses[response.status]
@ -544,31 +548,34 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
if not self.has_content:
self.method = "GET"
self.close_connection()
t = time.time()
response = self._get_http_response()
tries, response = self.follow_redirections(response,
set_result=False)
self.headers = response.msg
self.data = response.read()
encoding = headers.get_content_encoding(self.headers)
if encoding in _supported_encodings:
try:
if encoding == 'deflate':
f = StringIO.StringIO(zlib.decompress(self.data))
else:
f = linkcheck.gzip2.GzipFile('', 'rb', 9,
StringIO.StringIO(self.data))
except zlib.error, msg:
self.add_warning(_("Decompress error %(err)s") %
{"err": str(msg)},
tag="http-decompress-error")
f = StringIO.StringIO(self.data)
self.data = f.read()
self.downloadtime = time.time() - t
self._read_content(response)
response.close()
self.has_content = True
return self.data
def _read_content (self, response):
t = time.time()
self.data = response.read()
encoding = headers.get_content_encoding(self.headers)
if encoding in _supported_encodings:
try:
if encoding == 'deflate':
f = StringIO.StringIO(zlib.decompress(self.data))
else:
f = linkcheck.gzip2.GzipFile('', 'rb', 9,
StringIO.StringIO(self.data))
except zlib.error, msg:
self.add_warning(_("Decompress error %(err)s") %
{"err": str(msg)},
tag="http-decompress-error")
f = StringIO.StringIO(self.data)
self.data = f.read()
self.downloadtime = time.time() - t
self.has_content = True
def is_html (self):
"""
See if this URL points to a HTML file by looking at the