linkchecker/linkcheck/cookies.py
2009-01-08 14:18:03 +00:00

355 lines
12 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2005-2009 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
Parsing and storing of cookies. See [1]RFC 2965 and [2]RFC 2109.
The reason for this module is that neither the cookielib nor the Cookie
modules included in the Python standard library provide a usable interface
for programmable cookie handling.
This module provides parsing of cookies for all formats specified by
the above RFCs, plus smart methods handling data conversion and formatting.
And a cookie storage class is provided.
[1] http://www.faqs.org/rfcs/rfc2965.html
[2] http://www.faqs.org/rfcs/rfc2109.html
"""
from __future__ import with_statement
import time
import re
import Cookie
import cookielib
from cStringIO import StringIO
import rfc822
from . import strformat
class CookieError (StandardError):
"""Thrown for invalid cookie syntax or conflicting/impossible values."""
pass
unquote = Cookie._unquote
quote = Cookie._quote
has_embedded_dot = re.compile(r"[a-zA-Z0-9]\.[a-zA-Z]").search
# Pattern for finding cookie snatched from Pythons Cookie.py
# Modification: allow whitespace in values.
LegalChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
CookiePattern = re.compile(r"""
(?P<key> # Start of group 'key'
[%(legalchars)s]+? # Any word of at least one letter, nongreedy
) # End of group 'key'
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any doublequoted string
| # or
[%(legalchars)s\s]* # Any word or empty string
) # End of group 'val'
\s*;? # Probably ending in a semi-colon
""" % {"legalchars": LegalChars}, re.VERBOSE)
class HttpCookie (object):
"""A cookie consists of one name-value pair with attributes.
Each attribute consists of a predefined name (see attribute_names)
and a value (which is optional for some attributes)."""
# A mapping from the lowercase variant on the left to the
# appropriate traditional formatting on the right.
attribute_names = {
# Old Netscape attribute
"expires": "expires",
# Defined by RFC 2109
"path": "Path",
"comment": "Comment",
"domain": "Domain",
"max-age": "Max-Age",
"secure": "secure",
"version": "Version",
# Additional attributes defined by RFC 2965
"commenturl": "CommentURL",
"discard": "Discard",
"port": "Port",
}
def __init__ (self, name, value, attributes=None):
self.name = name
self.value = value
if attributes is None:
self.attributes = {}
else:
self.attributes = attributes
self.calculate_expiration()
def calculate_expiration (self):
now = time.time()
# default: does not expire
self.expire = None
if "max-age" in self.attributes:
try:
maxage = int(self.attributes["max-age"])
if maxage == 0:
# Expire immediately: subtract 1 to be sure since
# some clocks have only full second precision.
self.expire = now - 1
else:
self.expire = now + maxage
except (ValueError, OverflowError):
# note: even self.now + maxage can overflow
pass
elif "expires" in self.attributes:
self.expire = cookielib.http2time(self.attributes["expires"])
def is_expired (self, now=None):
if self.expire is None:
# Does not expire.
return False
if now is None:
now = time.time()
return now > self.expire
def __repr__ (self):
attrs = "; ".join("%s=%r"%(k, v) for k, v in self.attributes.items())
return "<%s %s=%r; %s>" % (self.__class__.__name__,
self.name, self.value, attrs)
def is_valid_for (self, scheme, host, port, path):
"""Check validity of this cookie against the desired scheme,
host and path."""
if self.check_expired() and \
self.check_domain(host) and \
self.check_port(port) and \
self.check_path(path) and \
self.check_secure(scheme):
return True
return False
def check_expired (self):
return not self.is_expired()
def check_domain (self, domain):
if "domain" not in self.attributes:
return False
cdomain = self.attributes["domain"]
if domain == cdomain:
# equality matches
return True
if "." not in domain and domain == cdomain[1:]:
# "localhost" and ".localhost" match
return True
if not domain.endswith(cdomain):
# any suffix matches
return False
if "." in domain[:-(len(cdomain)+1)]:
# prefix must be dot-free
return False
return True
def check_port (self, port):
return True
def check_path (self, path):
if "path" not in self.attributes:
return False
return path.startswith(self.attributes["path"])
def check_secure (self, scheme):
if "secure" in self.attributes:
return scheme == "https"
return True
def client_header_name (self):
return "Cookie"
def set_attribute (self, key, value):
if self.attributes is None:
raise CookieError("no NAME=VALUE before attributes found")
key = key.lower()
if key not in self.attribute_names:
raise CookieError("invalid attribute %r" % key)
value = unquote(value)
if key == "domain":
value = value.lower()
if not value.startswith("."):
if not has_embedded_dot(value):
if "." in value:
raise CookieError("invalid dot in domain %r" % value)
# supply a leading dot
value = "."+value
if key == "max-age":
try:
num = int(value)
if num < 0:
raise ValueError("Negative Max-Age")
except (OverflowError, ValueError):
raise CookieError("invalid Max-Age number: %r" % value)
if key == "port":
ports = value.split(",")
for port in ports:
try:
num = int(port)
if not (0 <= num <= 65535):
raise ValueError("Invalid port number")
except (OverflowError, ValueError):
raise CookieError("invalid port number: %r" % port)
self.attributes[key] = value
def parse (self, text, patt=CookiePattern):
text = strformat.ascii_safe(text)
# reset values
self.name = None
self.value = None
self.attributes = None
# Our starting point
i = 0
# Length of string
n = len(text)
while 0 <= i < n:
# Start looking for a key-value pair.
match = patt.search(text, i)
if not match:
# No more key-value pairs.
break
key, value = match.group("key"), match.group("val")
i = match.end()
# Parse the key, value in case it's metainfo.
if self.name is None:
# Set name and value.
self.name = key
self.value = unquote(value)
self.attributes = {}
else:
if key.startswith("$"):
key = key[1:]
self.set_attribute(key, value)
self.calculate_expiration()
def set_default_attributes (self, scheme, host, path):
scheme = strformat.ascii_safe(scheme)
host = strformat.ascii_safe(host)
path = strformat.ascii_safe(path)
if "domain" not in self.attributes:
self.attributes["domain"] = host.lower()
if "path" not in self.attributes:
i = path.rfind("/")
if i == -1:
path = "/"
else:
path = path[:i]
if not path:
path = "/"
self.attributes["path"] = path
if not self.check_domain(host):
cdomain = self.attributes["domain"]
raise CookieError("domain %r not for cookie %r" % (cdomain, host))
if not self.check_path(path):
cpath = self.attributes["path"]
raise CookieError("domain %r not for cookie %r" % (cpath, path))
if not self.check_secure(scheme):
raise CookieError("no secure scheme %r" % scheme)
def quote (self, key, value):
return quote(value)
def server_header_value (self):
parts = ["%s=%s" % (self.name, quote(self.value))]
parts.extend(["%s=%s"% (self.attribute_names[k], self.quote(k, v)) \
for k, v in self.attributes.items()])
return "; ".join(parts)
def client_header_value (self):
parts = []
if "version" in self.attributes:
parts.append("$Version=%s" % quote(self.attributes["version"]))
parts.append("%s=%s" % (self.name, quote(self.value)))
parts.extend(["$%s=%s"% (self.attribute_names[k], self.quote(k, v)) \
for k, v in self.attributes.items() if k != "version"])
return "; ".join(parts)
class NetscapeCookie (HttpCookie):
"""Parses RFC 2109 (Netscape) cookies."""
def __init__ (self, text, scheme, host, path):
self.parse(text)
self.set_default_attributes(scheme, host, path)
def server_header_name (self):
return "Set-Cookie"
class Rfc2965Cookie (HttpCookie):
def __init__ (self, text, scheme, host, path):
self.parse(text)
self.set_default_attributes(scheme, host, path)
def check_port (self, port):
if "port" not in self.attributes:
return True
cport = self.attributes["port"]
return port in [int(x) for x in cport.split(",")]
def server_header_name (self):
return "Set-Cookie2"
def quote (self, key, value):
if key == "port":
return quote(value, LegalChars="")
return quote(value)
# XXX more methods (equality test)
def from_file (filename):
"""Parse cookie data from a text file in HTTP header format.
@return: list of tuples (headers, scheme, host, path)
"""
entries = []
with open(filename) as fd:
lines = []
for line in fd.readlines():
line = line.rstrip()
if not line:
if lines:
entries.append(from_headers("\r\n".join(lines)))
lines = []
else:
lines.append(line)
if lines:
entries.append(from_headers("\r\n".join(lines)))
return entries
def from_headers (strheader):
"""Parse cookie data from a string in HTTP header (RFC 822) format.
@return: tuple (headers, scheme, host, path)
@raises: ValueError for incomplete or invalid data
"""
fp = StringIO(strheader)
headers = rfc822.Message(fp, seekable=True)
if "Host" not in headers:
raise ValueError("Required header 'Host:' missing")
host = headers["Host"]
scheme = headers.get("Scheme", "http")
path= headers.get("Path", "/")
return (headers, scheme, host, path)