mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-21 00:10:24 +00:00
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3887 e7d03fd6-7b0d-0410-9947-9c21f3af8025
355 lines
12 KiB
Python
355 lines
12 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2005-2009 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License
|
|
# along with this program; if not, write to the Free Software
|
|
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
"""
|
|
Parsing and storing of cookies. See [1]RFC 2965 and [2]RFC 2109.
|
|
The reason for this module is that neither the cookielib nor the Cookie
|
|
modules included in the Python standard library provide a usable interface
|
|
for programmable cookie handling.
|
|
This module provides parsing of cookies for all formats specified by
|
|
the above RFCs, plus smart methods handling data conversion and formatting.
|
|
And a cookie storage class is provided.
|
|
|
|
[1] http://www.faqs.org/rfcs/rfc2965.html
|
|
[2] http://www.faqs.org/rfcs/rfc2109.html
|
|
"""
|
|
|
|
from __future__ import with_statement
|
|
import time
|
|
import re
|
|
import Cookie
|
|
import cookielib
|
|
from cStringIO import StringIO
|
|
import rfc822
|
|
from . import strformat
|
|
|
|
|
|
class CookieError (StandardError):
|
|
"""Thrown for invalid cookie syntax or conflicting/impossible values."""
|
|
pass
|
|
|
|
|
|
unquote = Cookie._unquote
|
|
quote = Cookie._quote
|
|
has_embedded_dot = re.compile(r"[a-zA-Z0-9]\.[a-zA-Z]").search
|
|
|
|
|
|
# Pattern for finding cookie snatched from Pythons Cookie.py
|
|
# Modification: allow whitespace in values.
|
|
LegalChars = r"\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\="
|
|
CookiePattern = re.compile(r"""
|
|
(?P<key> # Start of group 'key'
|
|
[%(legalchars)s]+? # Any word of at least one letter, nongreedy
|
|
) # End of group 'key'
|
|
\s*=\s* # Equal Sign
|
|
(?P<val> # Start of group 'val'
|
|
"(?:[^\\"]|\\.)*" # Any doublequoted string
|
|
| # or
|
|
[%(legalchars)s\s]* # Any word or empty string
|
|
) # End of group 'val'
|
|
\s*;? # Probably ending in a semi-colon
|
|
""" % {"legalchars": LegalChars}, re.VERBOSE)
|
|
|
|
|
|
class HttpCookie (object):
|
|
"""A cookie consists of one name-value pair with attributes.
|
|
Each attribute consists of a predefined name (see attribute_names)
|
|
and a value (which is optional for some attributes)."""
|
|
|
|
# A mapping from the lowercase variant on the left to the
|
|
# appropriate traditional formatting on the right.
|
|
attribute_names = {
|
|
# Old Netscape attribute
|
|
"expires": "expires",
|
|
# Defined by RFC 2109
|
|
"path": "Path",
|
|
"comment": "Comment",
|
|
"domain": "Domain",
|
|
"max-age": "Max-Age",
|
|
"secure": "secure",
|
|
"version": "Version",
|
|
# Additional attributes defined by RFC 2965
|
|
"commenturl": "CommentURL",
|
|
"discard": "Discard",
|
|
"port": "Port",
|
|
}
|
|
|
|
def __init__ (self, name, value, attributes=None):
|
|
self.name = name
|
|
self.value = value
|
|
if attributes is None:
|
|
self.attributes = {}
|
|
else:
|
|
self.attributes = attributes
|
|
self.calculate_expiration()
|
|
|
|
def calculate_expiration (self):
|
|
now = time.time()
|
|
# default: does not expire
|
|
self.expire = None
|
|
if "max-age" in self.attributes:
|
|
try:
|
|
maxage = int(self.attributes["max-age"])
|
|
if maxage == 0:
|
|
# Expire immediately: subtract 1 to be sure since
|
|
# some clocks have only full second precision.
|
|
self.expire = now - 1
|
|
else:
|
|
self.expire = now + maxage
|
|
except (ValueError, OverflowError):
|
|
# note: even self.now + maxage can overflow
|
|
pass
|
|
elif "expires" in self.attributes:
|
|
self.expire = cookielib.http2time(self.attributes["expires"])
|
|
|
|
def is_expired (self, now=None):
|
|
if self.expire is None:
|
|
# Does not expire.
|
|
return False
|
|
if now is None:
|
|
now = time.time()
|
|
return now > self.expire
|
|
|
|
def __repr__ (self):
|
|
attrs = "; ".join("%s=%r"%(k, v) for k, v in self.attributes.items())
|
|
return "<%s %s=%r; %s>" % (self.__class__.__name__,
|
|
self.name, self.value, attrs)
|
|
|
|
def is_valid_for (self, scheme, host, port, path):
|
|
"""Check validity of this cookie against the desired scheme,
|
|
host and path."""
|
|
if self.check_expired() and \
|
|
self.check_domain(host) and \
|
|
self.check_port(port) and \
|
|
self.check_path(path) and \
|
|
self.check_secure(scheme):
|
|
return True
|
|
return False
|
|
|
|
def check_expired (self):
|
|
return not self.is_expired()
|
|
|
|
def check_domain (self, domain):
|
|
if "domain" not in self.attributes:
|
|
return False
|
|
cdomain = self.attributes["domain"]
|
|
if domain == cdomain:
|
|
# equality matches
|
|
return True
|
|
if "." not in domain and domain == cdomain[1:]:
|
|
# "localhost" and ".localhost" match
|
|
return True
|
|
if not domain.endswith(cdomain):
|
|
# any suffix matches
|
|
return False
|
|
if "." in domain[:-(len(cdomain)+1)]:
|
|
# prefix must be dot-free
|
|
return False
|
|
return True
|
|
|
|
def check_port (self, port):
|
|
return True
|
|
|
|
def check_path (self, path):
|
|
if "path" not in self.attributes:
|
|
return False
|
|
return path.startswith(self.attributes["path"])
|
|
|
|
def check_secure (self, scheme):
|
|
if "secure" in self.attributes:
|
|
return scheme == "https"
|
|
return True
|
|
|
|
def client_header_name (self):
|
|
return "Cookie"
|
|
|
|
def set_attribute (self, key, value):
|
|
if self.attributes is None:
|
|
raise CookieError("no NAME=VALUE before attributes found")
|
|
key = key.lower()
|
|
if key not in self.attribute_names:
|
|
raise CookieError("invalid attribute %r" % key)
|
|
value = unquote(value)
|
|
if key == "domain":
|
|
value = value.lower()
|
|
if not value.startswith("."):
|
|
if not has_embedded_dot(value):
|
|
if "." in value:
|
|
raise CookieError("invalid dot in domain %r" % value)
|
|
# supply a leading dot
|
|
value = "."+value
|
|
if key == "max-age":
|
|
try:
|
|
num = int(value)
|
|
if num < 0:
|
|
raise ValueError("Negative Max-Age")
|
|
except (OverflowError, ValueError):
|
|
raise CookieError("invalid Max-Age number: %r" % value)
|
|
if key == "port":
|
|
ports = value.split(",")
|
|
for port in ports:
|
|
try:
|
|
num = int(port)
|
|
if not (0 <= num <= 65535):
|
|
raise ValueError("Invalid port number")
|
|
except (OverflowError, ValueError):
|
|
raise CookieError("invalid port number: %r" % port)
|
|
self.attributes[key] = value
|
|
|
|
def parse (self, text, patt=CookiePattern):
|
|
text = strformat.ascii_safe(text)
|
|
# reset values
|
|
self.name = None
|
|
self.value = None
|
|
self.attributes = None
|
|
# Our starting point
|
|
i = 0
|
|
# Length of string
|
|
n = len(text)
|
|
|
|
while 0 <= i < n:
|
|
# Start looking for a key-value pair.
|
|
match = patt.search(text, i)
|
|
if not match:
|
|
# No more key-value pairs.
|
|
break
|
|
key, value = match.group("key"), match.group("val")
|
|
i = match.end()
|
|
# Parse the key, value in case it's metainfo.
|
|
if self.name is None:
|
|
# Set name and value.
|
|
self.name = key
|
|
self.value = unquote(value)
|
|
self.attributes = {}
|
|
else:
|
|
if key.startswith("$"):
|
|
key = key[1:]
|
|
self.set_attribute(key, value)
|
|
self.calculate_expiration()
|
|
|
|
def set_default_attributes (self, scheme, host, path):
|
|
scheme = strformat.ascii_safe(scheme)
|
|
host = strformat.ascii_safe(host)
|
|
path = strformat.ascii_safe(path)
|
|
if "domain" not in self.attributes:
|
|
self.attributes["domain"] = host.lower()
|
|
if "path" not in self.attributes:
|
|
i = path.rfind("/")
|
|
if i == -1:
|
|
path = "/"
|
|
else:
|
|
path = path[:i]
|
|
if not path:
|
|
path = "/"
|
|
self.attributes["path"] = path
|
|
if not self.check_domain(host):
|
|
cdomain = self.attributes["domain"]
|
|
raise CookieError("domain %r not for cookie %r" % (cdomain, host))
|
|
if not self.check_path(path):
|
|
cpath = self.attributes["path"]
|
|
raise CookieError("domain %r not for cookie %r" % (cpath, path))
|
|
if not self.check_secure(scheme):
|
|
raise CookieError("no secure scheme %r" % scheme)
|
|
|
|
def quote (self, key, value):
|
|
return quote(value)
|
|
|
|
def server_header_value (self):
|
|
parts = ["%s=%s" % (self.name, quote(self.value))]
|
|
parts.extend(["%s=%s"% (self.attribute_names[k], self.quote(k, v)) \
|
|
for k, v in self.attributes.items()])
|
|
return "; ".join(parts)
|
|
|
|
def client_header_value (self):
|
|
parts = []
|
|
if "version" in self.attributes:
|
|
parts.append("$Version=%s" % quote(self.attributes["version"]))
|
|
parts.append("%s=%s" % (self.name, quote(self.value)))
|
|
parts.extend(["$%s=%s"% (self.attribute_names[k], self.quote(k, v)) \
|
|
for k, v in self.attributes.items() if k != "version"])
|
|
return "; ".join(parts)
|
|
|
|
|
|
class NetscapeCookie (HttpCookie):
|
|
"""Parses RFC 2109 (Netscape) cookies."""
|
|
|
|
def __init__ (self, text, scheme, host, path):
|
|
self.parse(text)
|
|
self.set_default_attributes(scheme, host, path)
|
|
|
|
def server_header_name (self):
|
|
return "Set-Cookie"
|
|
|
|
|
|
class Rfc2965Cookie (HttpCookie):
|
|
|
|
def __init__ (self, text, scheme, host, path):
|
|
self.parse(text)
|
|
self.set_default_attributes(scheme, host, path)
|
|
|
|
def check_port (self, port):
|
|
if "port" not in self.attributes:
|
|
return True
|
|
cport = self.attributes["port"]
|
|
return port in [int(x) for x in cport.split(",")]
|
|
|
|
def server_header_name (self):
|
|
return "Set-Cookie2"
|
|
|
|
def quote (self, key, value):
|
|
if key == "port":
|
|
return quote(value, LegalChars="")
|
|
return quote(value)
|
|
|
|
|
|
# XXX more methods (equality test)
|
|
|
|
|
|
def from_file (filename):
|
|
"""Parse cookie data from a text file in HTTP header format.
|
|
|
|
@return: list of tuples (headers, scheme, host, path)
|
|
"""
|
|
entries = []
|
|
with open(filename) as fd:
|
|
lines = []
|
|
for line in fd.readlines():
|
|
line = line.rstrip()
|
|
if not line:
|
|
if lines:
|
|
entries.append(from_headers("\r\n".join(lines)))
|
|
lines = []
|
|
else:
|
|
lines.append(line)
|
|
if lines:
|
|
entries.append(from_headers("\r\n".join(lines)))
|
|
return entries
|
|
|
|
|
|
def from_headers (strheader):
|
|
"""Parse cookie data from a string in HTTP header (RFC 822) format.
|
|
|
|
@return: tuple (headers, scheme, host, path)
|
|
@raises: ValueError for incomplete or invalid data
|
|
"""
|
|
fp = StringIO(strheader)
|
|
headers = rfc822.Message(fp, seekable=True)
|
|
if "Host" not in headers:
|
|
raise ValueError("Required header 'Host:' missing")
|
|
host = headers["Host"]
|
|
scheme = headers.get("Scheme", "http")
|
|
path= headers.get("Path", "/")
|
|
return (headers, scheme, host, path)
|