mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Update cookie code from Python module.
This commit is contained in:
parent
65fa0a8289
commit
36badddfac
4 changed files with 187 additions and 37 deletions
6
linkcheck/cache/cookie.py
vendored
6
linkcheck/cache/cookie.py
vendored
|
|
@ -38,8 +38,9 @@ class CookieJar (object):
|
|||
errors = []
|
||||
for h in headers.getallmatchingheaders("Set-Cookie"):
|
||||
# RFC 2109 (Netscape) cookie type
|
||||
name, value = h.split(':', 1)
|
||||
try:
|
||||
cookie = cookies.NetscapeCookie(h, scheme, host, path)
|
||||
cookie = cookies.NetscapeCookie(value, scheme, host, path)
|
||||
if cookie in self.cache:
|
||||
self.cache.remove(cookie)
|
||||
if not cookie.is_expired():
|
||||
|
|
@ -50,8 +51,9 @@ class CookieJar (object):
|
|||
errors.append(errmsg)
|
||||
for h in headers.getallmatchingheaders("Set-Cookie2"):
|
||||
# RFC 2965 cookie type
|
||||
name, value = h.split(':', 1)
|
||||
try:
|
||||
cookie = cookies.Rfc2965Cookie(h, scheme, host, path)
|
||||
cookie = cookies.Rfc2965Cookie(value, scheme, host, path)
|
||||
if cookie in self.cache:
|
||||
self.cache.remove(cookie)
|
||||
if not cookie.is_expired():
|
||||
|
|
|
|||
|
|
@ -28,40 +28,172 @@ And a cookie storage class is provided.
|
|||
"""
|
||||
|
||||
import time
|
||||
import string
|
||||
import re
|
||||
import Cookie
|
||||
import cookielib
|
||||
import httplib
|
||||
from cStringIO import StringIO
|
||||
from . import strformat
|
||||
|
||||
|
||||
_nulljoin = ''.join
|
||||
_semispacejoin = '; '.join
|
||||
_spacejoin = ' '.join
|
||||
|
||||
class CookieError (StandardError):
|
||||
"""Thrown for invalid cookie syntax or conflicting/impossible values."""
|
||||
pass
|
||||
|
||||
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
|
||||
_Translator = {
|
||||
'\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
|
||||
'\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
|
||||
'\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
|
||||
'\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
|
||||
'\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
|
||||
'\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
|
||||
'\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
|
||||
'\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
|
||||
'\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
|
||||
'\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
|
||||
'\036' : '\\036', '\037' : '\\037',
|
||||
|
||||
# Because of the way browsers really handle cookies (as opposed
|
||||
# to what the RFC says) we also encode , and ;
|
||||
|
||||
',' : '\\054', ';' : '\\073',
|
||||
|
||||
'"' : '\\"', '\\' : '\\\\',
|
||||
|
||||
'\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
|
||||
'\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
|
||||
'\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
|
||||
'\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
|
||||
'\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
|
||||
'\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
|
||||
'\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
|
||||
'\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
|
||||
'\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
|
||||
'\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
|
||||
'\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
|
||||
'\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
|
||||
'\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
|
||||
'\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
|
||||
'\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
|
||||
'\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
|
||||
'\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
|
||||
'\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
|
||||
'\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
|
||||
'\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
|
||||
'\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
|
||||
'\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
|
||||
'\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
|
||||
'\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
|
||||
'\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
|
||||
'\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
|
||||
'\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
|
||||
'\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
|
||||
'\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
|
||||
'\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
|
||||
'\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
|
||||
'\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
|
||||
'\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
|
||||
'\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
|
||||
'\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
|
||||
'\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
|
||||
'\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
|
||||
'\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
|
||||
'\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
|
||||
'\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
|
||||
'\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
|
||||
'\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
|
||||
'\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
|
||||
}
|
||||
|
||||
def quote(str, LegalChars=_LegalChars):
|
||||
r"""Quote a string for use in a cookie header.
|
||||
|
||||
If the string does not need to be double-quoted, then just return the
|
||||
string. Otherwise, surround the string in doublequotes and quote
|
||||
(with a \) special characters.
|
||||
"""
|
||||
if all(c in LegalChars for c in str):
|
||||
return str
|
||||
else:
|
||||
return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
|
||||
|
||||
|
||||
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
|
||||
_QuotePatt = re.compile(r"[\\].")
|
||||
|
||||
def unquote(str):
|
||||
# If there aren't any doublequotes,
|
||||
# then there can't be any special characters. See RFC 2109.
|
||||
if len(str) < 2:
|
||||
return str
|
||||
if str[0] != '"' or str[-1] != '"':
|
||||
return str
|
||||
|
||||
# We have to assume that we must decode this string.
|
||||
# Down to work.
|
||||
|
||||
# Remove the "s
|
||||
str = str[1:-1]
|
||||
|
||||
# Check for special sequences. Examples:
|
||||
# \012 --> \n
|
||||
# \" --> "
|
||||
#
|
||||
i = 0
|
||||
n = len(str)
|
||||
res = []
|
||||
while 0 <= i < n:
|
||||
o_match = _OctalPatt.search(str, i)
|
||||
q_match = _QuotePatt.search(str, i)
|
||||
if not o_match and not q_match: # Neither matched
|
||||
res.append(str[i:])
|
||||
break
|
||||
# else:
|
||||
j = k = -1
|
||||
if o_match:
|
||||
j = o_match.start(0)
|
||||
if q_match:
|
||||
k = q_match.start(0)
|
||||
if q_match and (not o_match or k < j): # QuotePatt matched
|
||||
res.append(str[i:k])
|
||||
res.append(str[k+1])
|
||||
i = k + 2
|
||||
else: # OctalPatt matched
|
||||
res.append(str[i:j])
|
||||
res.append(chr(int(str[j+1:j+4], 8)))
|
||||
i = j + 4
|
||||
return _nulljoin(res)
|
||||
|
||||
|
||||
unquote = Cookie._unquote
|
||||
quote = Cookie._quote
|
||||
has_embedded_dot = re.compile(r"[a-zA-Z0-9]\.[a-zA-Z]").search
|
||||
|
||||
|
||||
# Pattern for finding cookie snatched from Pythons Cookie.py
|
||||
# Modification: allow whitespace in values.
|
||||
LegalChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
||||
CookiePattern = re.compile(r"""
|
||||
(?P<key> # Start of group 'key'
|
||||
[%(legalchars)s]+? # Any word of at least one letter, nongreedy
|
||||
) # End of group 'key'
|
||||
\s*=\s* # Equal Sign
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
[%(legalchars)s\s]* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
\s*;? # Probably ending in a semi-colon
|
||||
""" % {"legalchars": LegalChars}, re.VERBOSE)
|
||||
|
||||
_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
|
||||
_CookiePattern = re.compile(r"""
|
||||
(?x) # This is a verbose pattern
|
||||
(?P<key> # Start of group 'key'
|
||||
""" + _LegalCharsPatt + r"""+? # Any word of at least one letter
|
||||
) # End of group 'key'
|
||||
( # Optional group: there may not be a value.
|
||||
\s*=\s* # Equal Sign
|
||||
(?P<val> # Start of group 'val'
|
||||
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
||||
| # or
|
||||
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
|
||||
| # or
|
||||
""" + _LegalCharsPatt + r"""* # Any word or empty string
|
||||
) # End of group 'val'
|
||||
)? # End of optional value group
|
||||
\s* # Any number of spaces.
|
||||
(\s+|;|$) # Ending either at space, semicolon, or EOS.
|
||||
""")
|
||||
|
||||
class HttpCookie (object):
|
||||
"""A cookie consists of one name-value pair with attributes.
|
||||
|
|
@ -84,6 +216,8 @@ class HttpCookie (object):
|
|||
"commenturl": "CommentURL",
|
||||
"discard": "Discard",
|
||||
"port": "Port",
|
||||
# httponly to protect against XSS attacks
|
||||
"httponly": "httponly",
|
||||
}
|
||||
|
||||
def __init__ (self, name, value, attributes=None):
|
||||
|
|
@ -203,7 +337,10 @@ class HttpCookie (object):
|
|||
key = key.lower()
|
||||
if key not in self.attribute_names:
|
||||
raise CookieError("invalid attribute %r" % key)
|
||||
value = unquote(value)
|
||||
if value:
|
||||
value = unquote(value)
|
||||
else:
|
||||
value = ""
|
||||
if key == "domain":
|
||||
value = value.lower()
|
||||
if not value.startswith(".") and not has_embedded_dot(value):
|
||||
|
|
@ -227,7 +364,7 @@ class HttpCookie (object):
|
|||
raise CookieError("invalid port number: %r" % port)
|
||||
self.attributes[key] = value
|
||||
|
||||
def parse (self, text, patt=CookiePattern):
|
||||
def parse (self, text, patt=_CookiePattern):
|
||||
"""Parse cookie data."""
|
||||
text = strformat.ascii_safe(text.rstrip('\r\n'))
|
||||
# reset values
|
||||
|
|
@ -246,6 +383,8 @@ class HttpCookie (object):
|
|||
# No more key-value pairs.
|
||||
break
|
||||
key, value = match.group("key"), match.group("val")
|
||||
if value is None:
|
||||
value = ""
|
||||
i = match.end()
|
||||
# Parse the key, value in case it's metainfo.
|
||||
if self.name is None:
|
||||
|
|
@ -430,6 +569,7 @@ def cookie_str(cookie):
|
|||
#if cookie.port_specified: h.append(("port_spec", None))
|
||||
#if cookie.domain_initial_dot: h.append(("domain_dot", None))
|
||||
if cookie.secure: h.append(("secure", None))
|
||||
if cookie.httponly: h.append(("httponly", None))
|
||||
if cookie.expires: h.append(("expires",
|
||||
time2isoz(float(cookie.expires))))
|
||||
if cookie.discard: h.append(("discard", None))
|
||||
|
|
|
|||
30
tests/cache/test_cookiejar.py
vendored
30
tests/cache/test_cookiejar.py
vendored
|
|
@ -39,33 +39,36 @@ class TestCookieJar (unittest.TestCase):
|
|||
jar = linkcheck.cache.cookie.CookieJar()
|
||||
data = (
|
||||
("Foo", "Bar"),
|
||||
("Domain", "example.org"),
|
||||
("Domain", host),
|
||||
("Path", "/"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 1)
|
||||
# add updated cookie
|
||||
data = (
|
||||
("FOO", "Baz"),
|
||||
("Domain", "example.org"),
|
||||
("Domain", host),
|
||||
("Path", "/"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 1)
|
||||
# remove cookie
|
||||
data = (
|
||||
("FOO", "Baz"),
|
||||
("Domain", "example.org"),
|
||||
("Domain", host),
|
||||
("Path", "/"),
|
||||
("Max-Age", "0"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 0)
|
||||
|
||||
def test_cookie_cache2 (self):
|
||||
|
|
@ -75,31 +78,34 @@ class TestCookieJar (unittest.TestCase):
|
|||
jar = linkcheck.cache.cookie.CookieJar()
|
||||
data = (
|
||||
("Foo", "Bar"),
|
||||
("Domain", "example.org"),
|
||||
("Domain", host),
|
||||
("Path", "/"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie2', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 1)
|
||||
# add updated cookie
|
||||
data = (
|
||||
("Foo", "Baz"),
|
||||
("Domain", "EXAMPLE.org"),
|
||||
("Domain", host.upper()),
|
||||
("Path", "/"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie2', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 1)
|
||||
# remove cookie
|
||||
data = (
|
||||
("FOO", "Baz"),
|
||||
("Domain", "example.org"),
|
||||
("Domain", host),
|
||||
("Path", "/"),
|
||||
("Max-Age", "0"),
|
||||
)
|
||||
value = "; ".join('%s=%s' % (key, value) for key, value in data)
|
||||
headers = get_headers('Set-Cookie2', value)
|
||||
jar.add(headers, scheme, host, path)
|
||||
errors = jar.add(headers, scheme, host, path)
|
||||
self.assertFalse(errors, str(errors))
|
||||
self.assertEqual(len(jar.cache), 0)
|
||||
|
|
|
|||
|
|
@ -58,8 +58,9 @@ class TestCookies (unittest.TestCase):
|
|||
self.assertTrue(cookie.is_expired())
|
||||
|
||||
def test_netscape_cookie3 (self):
|
||||
# invalid port
|
||||
data = (
|
||||
("Foo", "Bar\""),
|
||||
("Foo", "Bar"),
|
||||
("Port", "hul,la"),
|
||||
)
|
||||
value = "; ".join('%s="%s"' % (key, value) for key, value in data)
|
||||
|
|
@ -71,7 +72,7 @@ class TestCookies (unittest.TestCase):
|
|||
|
||||
def test_netscape_cookie4 (self):
|
||||
data = (
|
||||
("Foo", "Bar\""),
|
||||
("Foo", "Bar"),
|
||||
("Domain", "localhost"),
|
||||
("Port", "100,555,76"),
|
||||
)
|
||||
|
|
@ -200,8 +201,9 @@ class TestCookies (unittest.TestCase):
|
|||
self.assertTrue(cookie.is_expired())
|
||||
|
||||
def test_rfc_cookie3 (self):
|
||||
# invalid port
|
||||
data = (
|
||||
("Foo", "Bar\""),
|
||||
("Foo", "Bar"),
|
||||
("Port", "hul,la"),
|
||||
)
|
||||
value = "; ".join('%s="%s"' % (key, value) for key, value in data)
|
||||
|
|
@ -213,7 +215,7 @@ class TestCookies (unittest.TestCase):
|
|||
|
||||
def test_rfc_cookie4 (self):
|
||||
data = (
|
||||
("Foo", "Bar\""),
|
||||
("Foo", "Bar"),
|
||||
("Port", "100,555,76"),
|
||||
)
|
||||
value = "; ".join('%s="%s"' % (key, value) for key, value in data)
|
||||
|
|
|
|||
Loading…
Reference in a new issue