From 977d9e9ae6b120ff2c698d64a7be33cde258c569 Mon Sep 17 00:00:00 2001 From: Bastian Kleineidam Date: Mon, 1 Aug 2011 20:26:31 +0200 Subject: [PATCH] Update cookie values instead of adding duplicate entries. --- doc/changelog.txt | 2 + linkcheck/cache/cookie.py | 20 +++++-- linkcheck/cookies.py | 40 +++++++++++-- tests/cache/__init__.py | 16 ++++++ tests/cache/test_cookiejar.py | 105 ++++++++++++++++++++++++++++++++++ tests/test_cookies.py | 60 +++++++++++++++++++ 6 files changed, 231 insertions(+), 12 deletions(-) create mode 100644 tests/cache/__init__.py create mode 100644 tests/cache/test_cookiejar.py diff --git a/doc/changelog.txt b/doc/changelog.txt index 0ee06ba5..7dc23a20 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -13,6 +13,8 @@ Fixes: authentication. Closes: SF bug #3377193 - checking: Ignore attribute errors when printing the Qt version. +- checking: Update cookie values instead of adding duplicate entries. + Closes: SF bug #3373910 Changes: - gui: Default to last URL checked in GUI (if no URL is given as diff --git a/linkcheck/cache/cookie.py b/linkcheck/cache/cookie.py index 7a69f9db..f92217f1 100644 --- a/linkcheck/cache/cookie.py +++ b/linkcheck/cache/cookie.py @@ -43,17 +43,25 @@ class CookieJar (object): for h in headers.getallmatchingheaders("Set-Cookie"): # RFC 2109 (Netscape) cookie type try: - jar.add(cookies.NetscapeCookie(h, scheme, host, path)) - except cookies.CookieError: + cookie = cookies.NetscapeCookie(h, scheme, host, path) + if cookie in jar: + jar.remove(cookie) + if not cookie.is_expired(): + jar.add(cookie) + except cookies.CookieError, msg: log.debug(LOG_CACHE, - "Invalid cookie header for %s:%s%s: %r", scheme, host, path, h) + "Invalid cookie %r for %s:%s%s: %s", h, scheme, host, path, msg) for h in headers.getallmatchingheaders("Set-Cookie2"): # RFC 2965 cookie type try: - jar.add(cookies.Rfc2965Cookie(h, scheme, host, path)) - except cookies.CookieError: + cookie = cookies.Rfc2965Cookie(h, scheme, host, path) + if cookie in jar: + jar.remove(cookie) + if not cookie.is_expired(): + jar.add(cookie) + except cookies.CookieError, msg: log.debug(LOG_CACHE, - "Invalid cookie2 header for %s:%s%s: %r", scheme, host, path, h) + "Invalid cookie2 %r for %s:%s%s: %s", h, scheme, host, path, msg) self.cache[host] = jar return jar diff --git a/linkcheck/cookies.py b/linkcheck/cookies.py index a37fb669..35e1b24c 100644 --- a/linkcheck/cookies.py +++ b/linkcheck/cookies.py @@ -305,12 +305,6 @@ class HttpCookie (object): for k, v in self.attributes.items() if k != "version"]) return "; ".join(parts) - def __eq__ (self, other): - """Compare equality of cookie.""" - return isinstance(other, HttpCookie) and \ - self.server_header_value() == other.server_header_value() - - class NetscapeCookie (HttpCookie): """Parses RFC 2109 (Netscape) cookies.""" @@ -323,6 +317,23 @@ class NetscapeCookie (HttpCookie): """Return "Set-Cookie" as server header name.""" return "Set-Cookie" + def __eq__ (self, other): + """Compare equality of cookie.""" + return (isinstance(other, NetscapeCookie) and + self.name.lower() == other.name.lower() and + self.attributes['domain'] == other.attributes['domain'] and + self.attributes['path'] == other.attributes['path']) + + def __hash__ (self): + """Cookie hash value""" + data = ( + self.name.lower(), + self.attributes['domain'], + self.attributes['path'], + ) + return hash(data) + + class Rfc2965Cookie (HttpCookie): """Parses RFC 2965 cookies.""" @@ -349,6 +360,23 @@ class Rfc2965Cookie (HttpCookie): return quote(value, LegalChars="") return quote(value) + def __eq__ (self, other): + """Compare equality of cookie.""" + return (isinstance(other, Rfc2965Cookie) and + self.name.lower() == other.name.lower() and + self.attributes['domain'].lower() == + other.attributes['domain'].lower() and + self.attributes['path'] == other.attributes['path']) + + def __hash__ (self): + """Cookie hash value""" + data = ( + self.name.lower(), + self.attributes['domain'].lower(), + self.attributes['path'], + ) + return hash(data) + def from_file (filename): """Parse cookie data from a text file in HTTP header format. diff --git a/tests/cache/__init__.py b/tests/cache/__init__.py new file mode 100644 index 00000000..067bb316 --- /dev/null +++ b/tests/cache/__init__.py @@ -0,0 +1,16 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2011 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. diff --git a/tests/cache/test_cookiejar.py b/tests/cache/test_cookiejar.py new file mode 100644 index 00000000..b93ac90e --- /dev/null +++ b/tests/cache/test_cookiejar.py @@ -0,0 +1,105 @@ +# -*- coding: iso-8859-1 -*- +# Copyright (C) 2011 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Test cookie jar caching routines. +""" + +import unittest +import httplib +from StringIO import StringIO +import linkcheck.cache.cookie + +def get_headers (name, value): + """Return HTTP header object with given name and value.""" + data = "%s: %s" % (name, value) + return httplib.HTTPMessage(StringIO(data)) + + +class TestCookieJar (unittest.TestCase): + """Test cookie jar routines.""" + + def test_cookie_cache1 (self): + scheme = "http" + host = "example.org" + path = "/" + jar = linkcheck.cache.cookie.CookieJar() + data = ( + ("Foo", "Bar"), + ("Domain", "example.org"), + ("Path", "/"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 1) + # add updated cookie + data = ( + ("FOO", "Baz"), + ("Domain", "example.org"), + ("Path", "/"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 1) + # remove cookie + data = ( + ("FOO", "Baz"), + ("Domain", "example.org"), + ("Path", "/"), + ("Max-Age", "0"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 0) + + def test_cookie_cache2 (self): + scheme = "http" + host = "example.org" + path = "/" + jar = linkcheck.cache.cookie.CookieJar() + data = ( + ("Foo", "Bar"), + ("Domain", "example.org"), + ("Path", "/"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie2', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 1) + # add updated cookie + data = ( + ("Foo", "Baz"), + ("Domain", "EXAMPLE.org"), + ("Path", "/"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie2', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 1) + # remove cookie + data = ( + ("FOO", "Baz"), + ("Domain", "example.org"), + ("Path", "/"), + ("Max-Age", "0"), + ) + value = "; ".join('%s=%s' % (key, value) for key, value in data) + headers = get_headers('Set-Cookie2', value) + jar.add(headers, scheme, host, path) + self.assertEqual(len(jar.cache[host]), 0) diff --git a/tests/test_cookies.py b/tests/test_cookies.py index 915a9af5..0443049c 100644 --- a/tests/test_cookies.py +++ b/tests/test_cookies.py @@ -113,6 +113,36 @@ class TestCookies (unittest.TestCase): self.assertTrue(cookie.is_valid_for("http", "www.example.org", 80, "/")) self.assertFalse(cookie.is_valid_for("http", "www.b.example.org", 80, "/")) + def test_netscape_cookie7 (self): + data1 = ( + ("Foo", "Bar"), + ("Domain", "example.org"), + ("Path", "/"), + ) + data2 = ( + ("FOO", "Baz"), + ("Domain", "example.org"), + ("Path", "/"), + ) + data3 = ( + ("FOOl", "Baz"), + ("Domain", "example.org"), + ("Path", "/"), + ) + # note: values are without quotes + value1 = "; ".join('%s=%s' % (key, value) for key, value in data1) + value2 = "; ".join('%s=%s' % (key, value) for key, value in data2) + value3 = "; ".join('%s=%s' % (key, value) for key, value in data3) + scheme = "http" + host = "example.org" + path = "/" + cookie1 = linkcheck.cookies.NetscapeCookie(value1, scheme, host, path) + cookie2 = linkcheck.cookies.NetscapeCookie(value2, scheme, host, path) + cookie3 = linkcheck.cookies.NetscapeCookie(value3, scheme, host, path) + self.assertEqual(cookie1, cookie2) + self.assertNotEqual(cookie1, cookie3) + self.assertNotEqual(cookie2, cookie3) + def test_rfc_cookie1 (self): data = ( ("Foo", "Bar"), @@ -169,6 +199,36 @@ class TestCookies (unittest.TestCase): cookie = linkcheck.cookies.Rfc2965Cookie(value, scheme, host, path) self.assertTrue(cookie.is_valid_for("http", host, 100, "/")) + def test_rfc_cookie5 (self): + data1 = ( + ("Foo", "Bar"), + ("Domain", "example.org"), + ("Path", "/"), + ) + data2 = ( + ("FOO", "Baz"), + ("Domain", "EXAMPLE.org"), + ("Path", "/"), + ) + data3 = ( + ("FOOl", "Baz"), + ("Domain", "EXAMPLE.org"), + ("Path", "/"), + ) + # note: values are without quotes + value1 = "; ".join('%s=%s' % (key, value) for key, value in data1) + value2 = "; ".join('%s=%s' % (key, value) for key, value in data2) + value3 = "; ".join('%s=%s' % (key, value) for key, value in data3) + scheme = "http" + host = "example.org" + path = "/" + cookie1 = linkcheck.cookies.Rfc2965Cookie(value1, scheme, host, path) + cookie2 = linkcheck.cookies.Rfc2965Cookie(value2, scheme, host, path) + cookie3 = linkcheck.cookies.Rfc2965Cookie(value3, scheme, host, path) + self.assertEqual(cookie1, cookie2) + self.assertNotEqual(cookie1, cookie3) + self.assertNotEqual(cookie2, cookie3) + def test_cookie_parse1 (self): lines = [ 'Host: example.org',