mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-10 07:34:44 +00:00
support content-encoding: deflate
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@524 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
407a770757
commit
0579ba6988
2 changed files with 16 additions and 8 deletions
|
|
@ -1,6 +1,6 @@
|
|||
1.6.2:
|
||||
* Warn about unknown Content-Encodings. We only support gzip at the
|
||||
moment.
|
||||
* Warn about unknown Content-Encodings.
|
||||
* Support deflate content encoding (snatched from Debians reportbug)
|
||||
|
||||
1.6.1:
|
||||
* FileUrlData.py: remove searching for links in text files, this is
|
||||
|
|
|
|||
|
|
@ -23,6 +23,7 @@ from UrlData import UrlData
|
|||
from urllib import splittype, splithost, splituser, splitpasswd
|
||||
from debuglevels import *
|
||||
|
||||
_supported_encodings = ('gzip', 'x-gzip', 'deflate')
|
||||
|
||||
class HttpUrlData (UrlData):
|
||||
"Url link with http scheme"
|
||||
|
|
@ -232,6 +233,7 @@ class HttpUrlData (UrlData):
|
|||
if self.parentName:
|
||||
self.urlConnection.putheader("Referer", self.parentName)
|
||||
self.urlConnection.putheader("User-Agent", Config.UserAgent)
|
||||
self.urlConnection.putheader("Accept-Encoding", "gzip;q=1.0, deflate;q=0.9, identity;q=0.5")
|
||||
if self.config['cookies']:
|
||||
self.cookies = self.config.getCookies(self.urlTuple[1],
|
||||
self.urlTuple[2])
|
||||
|
|
@ -254,10 +256,15 @@ class HttpUrlData (UrlData):
|
|||
status, statusText, self.headers = self._getHttpRequest("GET")
|
||||
self.urlConnection = self.urlConnection.getfile()
|
||||
self.data = self.urlConnection.read()
|
||||
if self.headers.get("Content-Encoding")=="gzip":
|
||||
import gzip, cStringIO
|
||||
f = gzip.GzipFile(filename="", mode="rb",
|
||||
fileobj=cStringIO.StringIO(self.data))
|
||||
encoding = self.headers.get("Content-Encoding")
|
||||
if encoding in _supported_encodings:
|
||||
from cStringIO import StringIO
|
||||
if encoding == 'deflate':
|
||||
import zlib
|
||||
f = StringIO(zlib.decompress(self.data))
|
||||
else:
|
||||
import gzip
|
||||
f = gzip.GzipFile('', 'rb', 9, StringIO(self.data))
|
||||
self.data = f.read()
|
||||
self.downloadtime = time.time() - t
|
||||
self.init_html_comments()
|
||||
|
|
@ -270,8 +277,9 @@ class HttpUrlData (UrlData):
|
|||
if self.headers.gettype()[:9]!="text/html":
|
||||
return 0
|
||||
encoding = self.headers.get("Content-Encoding")
|
||||
if encoding and encoding!='gzip':
|
||||
self.setWarning(linkcheck._('Unsupported content encoding %s.')%`encoding`)
|
||||
if encoding and encoding not in _supported_encodings:
|
||||
self.setWarning(linkcheck._('Unsupported content encoding %s.')%\
|
||||
`encoding`)
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue