Update cookie code from Python module.

This commit is contained in:
Bastian Kleineidam 2013-12-04 19:05:08 +01:00
parent 65fa0a8289
commit 36badddfac
4 changed files with 187 additions and 37 deletions

View file

@ -38,8 +38,9 @@ class CookieJar (object):
errors = []
for h in headers.getallmatchingheaders("Set-Cookie"):
# RFC 2109 (Netscape) cookie type
name, value = h.split(':', 1)
try:
cookie = cookies.NetscapeCookie(h, scheme, host, path)
cookie = cookies.NetscapeCookie(value, scheme, host, path)
if cookie in self.cache:
self.cache.remove(cookie)
if not cookie.is_expired():
@ -50,8 +51,9 @@ class CookieJar (object):
errors.append(errmsg)
for h in headers.getallmatchingheaders("Set-Cookie2"):
# RFC 2965 cookie type
name, value = h.split(':', 1)
try:
cookie = cookies.Rfc2965Cookie(h, scheme, host, path)
cookie = cookies.Rfc2965Cookie(value, scheme, host, path)
if cookie in self.cache:
self.cache.remove(cookie)
if not cookie.is_expired():

View file

@ -28,40 +28,172 @@ And a cookie storage class is provided.
"""
import time
import string
import re
import Cookie
import cookielib
import httplib
from cStringIO import StringIO
from . import strformat
_nulljoin = ''.join
_semispacejoin = '; '.join
_spacejoin = ' '.join
class CookieError (StandardError):
"""Thrown for invalid cookie syntax or conflicting/impossible values."""
pass
_LegalChars = string.ascii_letters + string.digits + "!#$%&'*+-.^_`|~:"
_Translator = {
'\000' : '\\000', '\001' : '\\001', '\002' : '\\002',
'\003' : '\\003', '\004' : '\\004', '\005' : '\\005',
'\006' : '\\006', '\007' : '\\007', '\010' : '\\010',
'\011' : '\\011', '\012' : '\\012', '\013' : '\\013',
'\014' : '\\014', '\015' : '\\015', '\016' : '\\016',
'\017' : '\\017', '\020' : '\\020', '\021' : '\\021',
'\022' : '\\022', '\023' : '\\023', '\024' : '\\024',
'\025' : '\\025', '\026' : '\\026', '\027' : '\\027',
'\030' : '\\030', '\031' : '\\031', '\032' : '\\032',
'\033' : '\\033', '\034' : '\\034', '\035' : '\\035',
'\036' : '\\036', '\037' : '\\037',
# Because of the way browsers really handle cookies (as opposed
# to what the RFC says) we also encode , and ;
',' : '\\054', ';' : '\\073',
'"' : '\\"', '\\' : '\\\\',
'\177' : '\\177', '\200' : '\\200', '\201' : '\\201',
'\202' : '\\202', '\203' : '\\203', '\204' : '\\204',
'\205' : '\\205', '\206' : '\\206', '\207' : '\\207',
'\210' : '\\210', '\211' : '\\211', '\212' : '\\212',
'\213' : '\\213', '\214' : '\\214', '\215' : '\\215',
'\216' : '\\216', '\217' : '\\217', '\220' : '\\220',
'\221' : '\\221', '\222' : '\\222', '\223' : '\\223',
'\224' : '\\224', '\225' : '\\225', '\226' : '\\226',
'\227' : '\\227', '\230' : '\\230', '\231' : '\\231',
'\232' : '\\232', '\233' : '\\233', '\234' : '\\234',
'\235' : '\\235', '\236' : '\\236', '\237' : '\\237',
'\240' : '\\240', '\241' : '\\241', '\242' : '\\242',
'\243' : '\\243', '\244' : '\\244', '\245' : '\\245',
'\246' : '\\246', '\247' : '\\247', '\250' : '\\250',
'\251' : '\\251', '\252' : '\\252', '\253' : '\\253',
'\254' : '\\254', '\255' : '\\255', '\256' : '\\256',
'\257' : '\\257', '\260' : '\\260', '\261' : '\\261',
'\262' : '\\262', '\263' : '\\263', '\264' : '\\264',
'\265' : '\\265', '\266' : '\\266', '\267' : '\\267',
'\270' : '\\270', '\271' : '\\271', '\272' : '\\272',
'\273' : '\\273', '\274' : '\\274', '\275' : '\\275',
'\276' : '\\276', '\277' : '\\277', '\300' : '\\300',
'\301' : '\\301', '\302' : '\\302', '\303' : '\\303',
'\304' : '\\304', '\305' : '\\305', '\306' : '\\306',
'\307' : '\\307', '\310' : '\\310', '\311' : '\\311',
'\312' : '\\312', '\313' : '\\313', '\314' : '\\314',
'\315' : '\\315', '\316' : '\\316', '\317' : '\\317',
'\320' : '\\320', '\321' : '\\321', '\322' : '\\322',
'\323' : '\\323', '\324' : '\\324', '\325' : '\\325',
'\326' : '\\326', '\327' : '\\327', '\330' : '\\330',
'\331' : '\\331', '\332' : '\\332', '\333' : '\\333',
'\334' : '\\334', '\335' : '\\335', '\336' : '\\336',
'\337' : '\\337', '\340' : '\\340', '\341' : '\\341',
'\342' : '\\342', '\343' : '\\343', '\344' : '\\344',
'\345' : '\\345', '\346' : '\\346', '\347' : '\\347',
'\350' : '\\350', '\351' : '\\351', '\352' : '\\352',
'\353' : '\\353', '\354' : '\\354', '\355' : '\\355',
'\356' : '\\356', '\357' : '\\357', '\360' : '\\360',
'\361' : '\\361', '\362' : '\\362', '\363' : '\\363',
'\364' : '\\364', '\365' : '\\365', '\366' : '\\366',
'\367' : '\\367', '\370' : '\\370', '\371' : '\\371',
'\372' : '\\372', '\373' : '\\373', '\374' : '\\374',
'\375' : '\\375', '\376' : '\\376', '\377' : '\\377'
}
def quote(str, LegalChars=_LegalChars):
r"""Quote a string for use in a cookie header.
If the string does not need to be double-quoted, then just return the
string. Otherwise, surround the string in doublequotes and quote
(with a \) special characters.
"""
if all(c in LegalChars for c in str):
return str
else:
return '"' + _nulljoin(_Translator.get(s, s) for s in str) + '"'
_OctalPatt = re.compile(r"\\[0-3][0-7][0-7]")
_QuotePatt = re.compile(r"[\\].")
def unquote(str):
# If there aren't any doublequotes,
# then there can't be any special characters. See RFC 2109.
if len(str) < 2:
return str
if str[0] != '"' or str[-1] != '"':
return str
# We have to assume that we must decode this string.
# Down to work.
# Remove the "s
str = str[1:-1]
# Check for special sequences. Examples:
# \012 --> \n
# \" --> "
#
i = 0
n = len(str)
res = []
while 0 <= i < n:
o_match = _OctalPatt.search(str, i)
q_match = _QuotePatt.search(str, i)
if not o_match and not q_match: # Neither matched
res.append(str[i:])
break
# else:
j = k = -1
if o_match:
j = o_match.start(0)
if q_match:
k = q_match.start(0)
if q_match and (not o_match or k < j): # QuotePatt matched
res.append(str[i:k])
res.append(str[k+1])
i = k + 2
else: # OctalPatt matched
res.append(str[i:j])
res.append(chr(int(str[j+1:j+4], 8)))
i = j + 4
return _nulljoin(res)
unquote = Cookie._unquote
quote = Cookie._quote
has_embedded_dot = re.compile(r"[a-zA-Z0-9]\.[a-zA-Z]").search
# Pattern for finding cookie snatched from Pythons Cookie.py
# Modification: allow whitespace in values.
LegalChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
CookiePattern = re.compile(r"""
(?P<key> # Start of group 'key'
[%(legalchars)s]+? # Any word of at least one letter, nongreedy
) # End of group 'key'
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
[%(legalchars)s\s]* # Any word or empty string
) # End of group 'val'
\s*;? # Probably ending in a semi-colon
""" % {"legalchars": LegalChars}, re.VERBOSE)
_LegalCharsPatt = r"[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=]"
_CookiePattern = re.compile(r"""
(?x) # This is a verbose pattern
(?P<key> # Start of group 'key'
""" + _LegalCharsPatt + r"""+? # Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
\w{3},\s[\w\d\s-]{9,11}\s[\d:]{8}\sGMT # Special case for "expires" attr
| # or
""" + _LegalCharsPatt + r"""* # Any word or empty string
) # End of group 'val'
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""")
class HttpCookie (object):
"""A cookie consists of one name-value pair with attributes.
@ -84,6 +216,8 @@ class HttpCookie (object):
"commenturl": "CommentURL",
"discard": "Discard",
"port": "Port",
# httponly to protect against XSS attacks
"httponly": "httponly",
}
def __init__ (self, name, value, attributes=None):
@ -203,7 +337,10 @@ class HttpCookie (object):
key = key.lower()
if key not in self.attribute_names:
raise CookieError("invalid attribute %r" % key)
value = unquote(value)
if value:
value = unquote(value)
else:
value = ""
if key == "domain":
value = value.lower()
if not value.startswith(".") and not has_embedded_dot(value):
@ -227,7 +364,7 @@ class HttpCookie (object):
raise CookieError("invalid port number: %r" % port)
self.attributes[key] = value
def parse (self, text, patt=CookiePattern):
def parse (self, text, patt=_CookiePattern):
"""Parse cookie data."""
text = strformat.ascii_safe(text.rstrip('\r\n'))
# reset values
@ -246,6 +383,8 @@ class HttpCookie (object):
# No more key-value pairs.
break
key, value = match.group("key"), match.group("val")
if value is None:
value = ""
i = match.end()
# Parse the key, value in case it's metainfo.
if self.name is None:
@ -430,6 +569,7 @@ def cookie_str(cookie):
#if cookie.port_specified: h.append(("port_spec", None))
#if cookie.domain_initial_dot: h.append(("domain_dot", None))
if cookie.secure: h.append(("secure", None))
if cookie.httponly: h.append(("httponly", None))
if cookie.expires: h.append(("expires",
time2isoz(float(cookie.expires))))
if cookie.discard: h.append(("discard", None))

View file

@ -39,33 +39,36 @@ class TestCookieJar (unittest.TestCase):
jar = linkcheck.cache.cookie.CookieJar()
data = (
("Foo", "Bar"),
("Domain", "example.org"),
("Domain", host),
("Path", "/"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 1)
# add updated cookie
data = (
("FOO", "Baz"),
("Domain", "example.org"),
("Domain", host),
("Path", "/"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 1)
# remove cookie
data = (
("FOO", "Baz"),
("Domain", "example.org"),
("Domain", host),
("Path", "/"),
("Max-Age", "0"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 0)
def test_cookie_cache2 (self):
@ -75,31 +78,34 @@ class TestCookieJar (unittest.TestCase):
jar = linkcheck.cache.cookie.CookieJar()
data = (
("Foo", "Bar"),
("Domain", "example.org"),
("Domain", host),
("Path", "/"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie2', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 1)
# add updated cookie
data = (
("Foo", "Baz"),
("Domain", "EXAMPLE.org"),
("Domain", host.upper()),
("Path", "/"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie2', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 1)
# remove cookie
data = (
("FOO", "Baz"),
("Domain", "example.org"),
("Domain", host),
("Path", "/"),
("Max-Age", "0"),
)
value = "; ".join('%s=%s' % (key, value) for key, value in data)
headers = get_headers('Set-Cookie2', value)
jar.add(headers, scheme, host, path)
errors = jar.add(headers, scheme, host, path)
self.assertFalse(errors, str(errors))
self.assertEqual(len(jar.cache), 0)

View file

@ -58,8 +58,9 @@ class TestCookies (unittest.TestCase):
self.assertTrue(cookie.is_expired())
def test_netscape_cookie3 (self):
# invalid port
data = (
("Foo", "Bar\""),
("Foo", "Bar"),
("Port", "hul,la"),
)
value = "; ".join('%s="%s"' % (key, value) for key, value in data)
@ -71,7 +72,7 @@ class TestCookies (unittest.TestCase):
def test_netscape_cookie4 (self):
data = (
("Foo", "Bar\""),
("Foo", "Bar"),
("Domain", "localhost"),
("Port", "100,555,76"),
)
@ -200,8 +201,9 @@ class TestCookies (unittest.TestCase):
self.assertTrue(cookie.is_expired())
def test_rfc_cookie3 (self):
# invalid port
data = (
("Foo", "Bar\""),
("Foo", "Bar"),
("Port", "hul,la"),
)
value = "; ".join('%s="%s"' % (key, value) for key, value in data)
@ -213,7 +215,7 @@ class TestCookies (unittest.TestCase):
def test_rfc_cookie4 (self):
data = (
("Foo", "Bar\""),
("Foo", "Bar"),
("Port", "100,555,76"),
)
value = "; ".join('%s="%s"' % (key, value) for key, value in data)