mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-26 02:40:23 +00:00
Read complete body data on persistent connections, else subsequent requests could fail.
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3568 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
2b94c0c161
commit
2edfaea03e
1 changed files with 26 additions and 19 deletions
|
|
@ -494,9 +494,13 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.putheader(name, value)
|
||||
self.url_connection.endheaders()
|
||||
response = self.url_connection.getresponse()
|
||||
self.persistent = not response.will_close
|
||||
self.timeout = headers.http_timeout(response)
|
||||
self.headers = response.msg
|
||||
self.persistent = not response.will_close
|
||||
if self.persistent and (self.method == "GET" or
|
||||
self.headers.getheader("Content-Length", "")):
|
||||
# always read content from persistent connections
|
||||
self._read_content(response)
|
||||
# If possible, use official W3C HTTP response name
|
||||
if response.status in httpresponses:
|
||||
response.reason = httpresponses[response.status]
|
||||
|
|
@ -544,31 +548,34 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if not self.has_content:
|
||||
self.method = "GET"
|
||||
self.close_connection()
|
||||
t = time.time()
|
||||
response = self._get_http_response()
|
||||
tries, response = self.follow_redirections(response,
|
||||
set_result=False)
|
||||
self.headers = response.msg
|
||||
self.data = response.read()
|
||||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding in _supported_encodings:
|
||||
try:
|
||||
if encoding == 'deflate':
|
||||
f = StringIO.StringIO(zlib.decompress(self.data))
|
||||
else:
|
||||
f = linkcheck.gzip2.GzipFile('', 'rb', 9,
|
||||
StringIO.StringIO(self.data))
|
||||
except zlib.error, msg:
|
||||
self.add_warning(_("Decompress error %(err)s") %
|
||||
{"err": str(msg)},
|
||||
tag="http-decompress-error")
|
||||
f = StringIO.StringIO(self.data)
|
||||
self.data = f.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self._read_content(response)
|
||||
response.close()
|
||||
self.has_content = True
|
||||
return self.data
|
||||
|
||||
def _read_content (self, response):
|
||||
t = time.time()
|
||||
self.data = response.read()
|
||||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding in _supported_encodings:
|
||||
try:
|
||||
if encoding == 'deflate':
|
||||
f = StringIO.StringIO(zlib.decompress(self.data))
|
||||
else:
|
||||
f = linkcheck.gzip2.GzipFile('', 'rb', 9,
|
||||
StringIO.StringIO(self.data))
|
||||
except zlib.error, msg:
|
||||
self.add_warning(_("Decompress error %(err)s") %
|
||||
{"err": str(msg)},
|
||||
tag="http-decompress-error")
|
||||
f = StringIO.StringIO(self.data)
|
||||
self.data = f.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self.has_content = True
|
||||
|
||||
def is_html (self):
|
||||
"""
|
||||
See if this URL points to a HTML file by looking at the
|
||||
|
|
|
|||
Loading…
Reference in a new issue