mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-15 03:41:01 +00:00
Fix buggy persistent HTTP connections
Workaround for buggy servers that break protocol synchronization of persistent HTTP connections. git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3677 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
67e55d3832
commit
91a0aad5d8
2 changed files with 13 additions and 1 deletions
|
|
@ -23,6 +23,11 @@
|
|||
Type: bugfix
|
||||
Changed: linkcheck/checker/mailtourl.py
|
||||
|
||||
* Workaround for buggy servers that break protocol synchronization of
|
||||
persistent HTTP connections.
|
||||
Changed: linkcheck/checker/httpurl.py
|
||||
Closes: SF bug #1913992
|
||||
|
||||
4.8 "Hallam Foe" (released 16.12.2007)
|
||||
|
||||
* Fix message typo for not disclosing information.
|
||||
|
|
|
|||
|
|
@ -502,9 +502,16 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.headers = response.msg
|
||||
self.persistent = not response.will_close
|
||||
if self.persistent and (self.method == "GET" or
|
||||
self.headers.getheader("Content-Length", "")):
|
||||
self.headers.getheader("Content-Length") != "0"):
|
||||
# always read content from persistent connections
|
||||
self._read_content(response)
|
||||
if self.persistent and self.method == "HEAD":
|
||||
# Some servers send page content after a HEAD request,
|
||||
# but only after making the *next* request. This breaks
|
||||
# protocol synchronisation. Workaround here is to close
|
||||
# the connection after HEAD.
|
||||
# Example: http://www.empleo.gob.mx (Apache/1.3.33 (Unix) mod_jk)
|
||||
self.persistent = False
|
||||
# If possible, use official W3C HTTP response name
|
||||
if response.status in httpresponses:
|
||||
response.reason = httpresponses[response.status]
|
||||
|
|
|
|||
Loading…
Reference in a new issue