mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Remove spaces after names in class method definitions
And also nested functions. This is a PEP 8 convention, E211.
This commit is contained in:
parent
1663e10fe7
commit
a15a2833ca
103 changed files with 645 additions and 645 deletions
|
|
@ -252,7 +252,7 @@ else:
|
|||
class Colorizer:
|
||||
"""Prints colored messages to streams."""
|
||||
|
||||
def __init__ (self, fp):
|
||||
def __init__(self, fp):
|
||||
"""Initialize with given stream (file-like object)."""
|
||||
self.fp = fp
|
||||
if has_colors(fp):
|
||||
|
|
@ -260,18 +260,18 @@ class Colorizer:
|
|||
else:
|
||||
self.write = self._write
|
||||
|
||||
def _write (self, text, color=None):
|
||||
def _write(self, text, color=None):
|
||||
"""Print text as-is."""
|
||||
self.fp.write(text)
|
||||
|
||||
def _write_color (self, text, color=None):
|
||||
def _write_color(self, text, color=None):
|
||||
"""Print text with given color. If color is None, print text as-is."""
|
||||
if color is None:
|
||||
self.fp.write(text)
|
||||
else:
|
||||
write_color(self.fp, text, color)
|
||||
|
||||
def __getattr__ (self, name):
|
||||
def __getattr__(self, name):
|
||||
"""Delegate attribute access to the stored stream object."""
|
||||
return getattr(self.fp, name)
|
||||
|
||||
|
|
@ -279,7 +279,7 @@ class Colorizer:
|
|||
class ColoredStreamHandler(logging.StreamHandler):
|
||||
"""Send colored log messages to streams (file-like objects)."""
|
||||
|
||||
def __init__ (self, strm=None):
|
||||
def __init__(self, strm=None):
|
||||
"""Log to given stream (a file-like object) or to stderr if
|
||||
strm is None.
|
||||
"""
|
||||
|
|
@ -293,12 +293,12 @@ class ColoredStreamHandler(logging.StreamHandler):
|
|||
logging.DEBUG: 'white',
|
||||
}
|
||||
|
||||
def get_color (self, record):
|
||||
def get_color(self, record):
|
||||
"""Get appropriate color according to log level.
|
||||
"""
|
||||
return self.colors.get(record.levelno, 'default')
|
||||
|
||||
def emit (self, record):
|
||||
def emit(self, record):
|
||||
"""Emit a record.
|
||||
|
||||
If a formatter is specified, it is used to format the record.
|
||||
|
|
|
|||
6
linkcheck/cache/robots_txt.py
vendored
6
linkcheck/cache/robots_txt.py
vendored
|
|
@ -33,7 +33,7 @@ class RobotsTxt:
|
|||
format: {cache key (string) -> robots.txt content (RobotFileParser)}
|
||||
"""
|
||||
|
||||
def __init__ (self, useragent):
|
||||
def __init__(self, useragent):
|
||||
"""Initialize per-URL robots.txt cache."""
|
||||
# mapping {URL -> parsed robots.txt}
|
||||
self.cache = LFUCache(size=100)
|
||||
|
|
@ -41,13 +41,13 @@ class RobotsTxt:
|
|||
self.roboturl_locks = {}
|
||||
self.useragent = useragent
|
||||
|
||||
def allows_url (self, url_data):
|
||||
def allows_url(self, url_data):
|
||||
"""Ask robots.txt allowance."""
|
||||
roboturl = url_data.get_robots_txt_url()
|
||||
with self.get_lock(roboturl):
|
||||
return self._allows_url(url_data, roboturl)
|
||||
|
||||
def _allows_url (self, url_data, roboturl):
|
||||
def _allows_url(self, url_data, roboturl):
|
||||
"""Ask robots.txt allowance. Assumes only single thread per robots.txt
|
||||
URL calls this function."""
|
||||
with cache_lock:
|
||||
|
|
|
|||
24
linkcheck/cache/urlqueue.py
vendored
24
linkcheck/cache/urlqueue.py
vendored
|
|
@ -37,7 +37,7 @@ class UrlQueue:
|
|||
"""A queue supporting several consumer tasks. The task_done() idea is
|
||||
from the Python 2.5 implementation of Queue.Queue()."""
|
||||
|
||||
def __init__ (self, max_allowed_urls=None):
|
||||
def __init__(self, max_allowed_urls=None):
|
||||
"""Initialize the queue state and task counters."""
|
||||
# Note: don't put a maximum size on the queue since it would
|
||||
# lead to deadlocks when all worker threads called put().
|
||||
|
|
@ -62,31 +62,31 @@ class UrlQueue:
|
|||
self.max_allowed_urls = max_allowed_urls
|
||||
self.num_puts = 0
|
||||
|
||||
def qsize (self):
|
||||
def qsize(self):
|
||||
"""Return the approximate size of the queue (not reliable!)."""
|
||||
with self.mutex:
|
||||
return len(self.queue)
|
||||
|
||||
def empty (self):
|
||||
def empty(self):
|
||||
"""Return True if the queue is empty, False otherwise.
|
||||
Result is thread-safe, but not reliable since the queue could have
|
||||
been changed before the result is returned!"""
|
||||
with self.mutex:
|
||||
return self._empty()
|
||||
|
||||
def _empty (self):
|
||||
def _empty(self):
|
||||
"""Return True if the queue is empty, False otherwise.
|
||||
Not thread-safe!"""
|
||||
return not self.queue
|
||||
|
||||
def get (self, timeout=None):
|
||||
def get(self, timeout=None):
|
||||
"""Get first not-in-progress url from the queue and
|
||||
return it. If no such url is available return None.
|
||||
"""
|
||||
with self.not_empty:
|
||||
return self._get(timeout)
|
||||
|
||||
def _get (self, timeout):
|
||||
def _get(self, timeout):
|
||||
"""Non thread-safe utility function of self.get() doing the real
|
||||
work."""
|
||||
if timeout is None:
|
||||
|
|
@ -104,7 +104,7 @@ class UrlQueue:
|
|||
self.in_progress += 1
|
||||
return self.queue.popleft()
|
||||
|
||||
def put (self, item):
|
||||
def put(self, item):
|
||||
"""Put an item into the queue.
|
||||
Block if necessary until a free slot is available.
|
||||
"""
|
||||
|
|
@ -112,7 +112,7 @@ class UrlQueue:
|
|||
self._put(item)
|
||||
self.not_empty.notify()
|
||||
|
||||
def _put (self, url_data):
|
||||
def _put(self, url_data):
|
||||
"""Put URL in queue, increase number of unfinished tasks."""
|
||||
if self.shutdown or self.max_allowed_urls == 0:
|
||||
return
|
||||
|
|
@ -154,7 +154,7 @@ class UrlQueue:
|
|||
self.queue.rotate(pos)
|
||||
self.queue.appendleft(item)
|
||||
|
||||
def task_done (self, url_data):
|
||||
def task_done(self, url_data):
|
||||
"""
|
||||
Indicate that a formerly enqueued task is complete.
|
||||
|
||||
|
|
@ -179,7 +179,7 @@ class UrlQueue:
|
|||
raise ValueError('task_done() called too many times')
|
||||
self.all_tasks_done.notifyAll()
|
||||
|
||||
def join (self, timeout=None):
|
||||
def join(self, timeout=None):
|
||||
"""Blocks until all items in the Queue have been gotten and processed.
|
||||
|
||||
The count of unfinished tasks goes up whenever an item is added to the
|
||||
|
|
@ -202,7 +202,7 @@ class UrlQueue:
|
|||
raise Timeout()
|
||||
self.all_tasks_done.wait(remaining)
|
||||
|
||||
def do_shutdown (self):
|
||||
def do_shutdown(self):
|
||||
"""Shutdown the queue by not accepting any more URLs."""
|
||||
with self.mutex:
|
||||
unfinished = self.unfinished_tasks - len(self.queue)
|
||||
|
|
@ -214,7 +214,7 @@ class UrlQueue:
|
|||
self.unfinished_tasks = unfinished
|
||||
self.shutdown = True
|
||||
|
||||
def status (self):
|
||||
def status(self):
|
||||
"""Get tuple (finished tasks, in progress, queue size)."""
|
||||
# no need to acquire self.mutex since the numbers are unreliable anyways.
|
||||
return (self.finished_tasks, self.in_progress, len(self.queue))
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class DnsUrl(urlbase.UrlBase):
|
|||
Url link with dns scheme.
|
||||
"""
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
dns: URLs do not have any content
|
||||
|
||||
|
|
|
|||
|
|
@ -93,7 +93,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
Url link with file scheme.
|
||||
"""
|
||||
|
||||
def init (self, base_ref, base_url, parent_url, recursion_level,
|
||||
def init(self, base_ref, base_url, parent_url, recursion_level,
|
||||
aggregate, line, column, page, name, url_encoding, extern):
|
||||
"""Initialize the scheme."""
|
||||
super(FileUrl, self).init(base_ref, base_url, parent_url,
|
||||
|
|
@ -128,7 +128,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
base_url = re.sub("^file://([^/])", r"file:///\1", base_url)
|
||||
self.base_url = base_url
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Calls super.build_url() and adds a trailing slash to directories.
|
||||
"""
|
||||
|
|
@ -154,7 +154,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
self.urlparts[2] += '/'
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Get size of file content and modification time from filename path."""
|
||||
if self.is_directory():
|
||||
# Directory size always differs from the customer index.html
|
||||
|
|
@ -164,7 +164,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
self.size = fileutil.get_size(filename)
|
||||
self.modified = datetime.utcfromtimestamp(fileutil.get_mtime(filename))
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Try to open the local file. Under NT systems the case sensitivity
|
||||
is checked.
|
||||
|
|
@ -180,7 +180,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
self.url_connection = urllib.request.urlopen(url)
|
||||
self.check_case_sensitivity()
|
||||
|
||||
def check_case_sensitivity (self):
|
||||
def check_case_sensitivity(self):
|
||||
"""
|
||||
Check if url and windows path name match cases
|
||||
else there might be problems when copying such
|
||||
|
|
@ -197,7 +197,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
{"path": path, "realpath": realpath},
|
||||
tag=WARN_FILE_SYSTEM_PATH)
|
||||
|
||||
def read_content (self):
|
||||
def read_content(self):
|
||||
"""Return file content, or in case of directories a dummy HTML file
|
||||
with links to the files."""
|
||||
if self.is_directory():
|
||||
|
|
@ -208,7 +208,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
data = super(FileUrl, self).read_content()
|
||||
return data
|
||||
|
||||
def get_os_filename (self):
|
||||
def get_os_filename(self):
|
||||
"""
|
||||
Construct os specific file path out of the file:// URL.
|
||||
|
||||
|
|
@ -217,11 +217,11 @@ class FileUrl(urlbase.UrlBase):
|
|||
"""
|
||||
return get_os_filename(self.urlparts[2])
|
||||
|
||||
def get_temp_filename (self):
|
||||
def get_temp_filename(self):
|
||||
"""Get filename for content to parse."""
|
||||
return self.get_os_filename()
|
||||
|
||||
def is_directory (self):
|
||||
def is_directory(self):
|
||||
"""
|
||||
Check if file is a directory.
|
||||
|
||||
|
|
@ -231,7 +231,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
filename = self.get_os_filename()
|
||||
return os.path.isdir(filename) and not os.path.islink(filename)
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""Check if content is parseable for recursion.
|
||||
|
||||
@return: True if content is parseable
|
||||
|
|
@ -246,7 +246,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
log.debug(LOG_CHECK, "File with content type %r is not parseable.", self.content_type)
|
||||
return False
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Return URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
if self.url:
|
||||
|
|
@ -254,7 +254,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
else:
|
||||
self.content_type = ""
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""Get pattern for intern URL matching.
|
||||
|
||||
@return non-empty regex pattern or None
|
||||
|
|
@ -271,7 +271,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
url = url[:i+1]
|
||||
return re.escape(url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
"""If a local webroot directory is configured, replace absolute URLs
|
||||
with it. After that queue the URL data for checking."""
|
||||
webroot = self.aggregate.config["localwebroot"]
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
Url link with ftp scheme.
|
||||
"""
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Initialize FTP url data.
|
||||
"""
|
||||
|
|
@ -41,7 +41,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.filename = None
|
||||
self.filename_encoding = 'iso-8859-1'
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
In case of proxy, delegate to HttpUrl. Else check in this
|
||||
order: login, changing directory, list the file.
|
||||
|
|
@ -67,7 +67,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.files = []
|
||||
return None
|
||||
|
||||
def login (self):
|
||||
def login(self):
|
||||
"""Log into ftp server and check the welcome message."""
|
||||
self.url_connection = ftplib.FTP(timeout=self.aggregate.config["timeout"])
|
||||
if log.is_debug(LOG_CHECK):
|
||||
|
|
@ -93,7 +93,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
raise LinkCheckerError(
|
||||
_("Remote host has closed connection: %(msg)s") % str(msg))
|
||||
|
||||
def negotiate_encoding (self):
|
||||
def negotiate_encoding(self):
|
||||
"""Check if server can handle UTF-8 encoded filenames.
|
||||
See also RFC 2640."""
|
||||
try:
|
||||
|
|
@ -106,7 +106,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if " UTF-8" in features.splitlines():
|
||||
self.filename_encoding = "utf-8"
|
||||
|
||||
def cwd (self):
|
||||
def cwd(self):
|
||||
"""
|
||||
Change to URL parent directory. Return filename of last path
|
||||
component.
|
||||
|
|
@ -122,7 +122,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.cwd(d)
|
||||
return filename
|
||||
|
||||
def listfile (self):
|
||||
def listfile(self):
|
||||
"""
|
||||
See if filename is in the current FTP directory.
|
||||
"""
|
||||
|
|
@ -143,11 +143,11 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return
|
||||
raise ftplib.error_perm("550 File not found")
|
||||
|
||||
def get_files (self):
|
||||
def get_files(self):
|
||||
"""Get list of filenames in directory. Subdirectories have an
|
||||
ending slash."""
|
||||
files = []
|
||||
def add_entry (line):
|
||||
def add_entry(line):
|
||||
"""Parse list line and add the entry it points to to the file
|
||||
list."""
|
||||
log.debug(LOG_CHECK, "Directory entry %r", line)
|
||||
|
|
@ -162,7 +162,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.dir(add_entry)
|
||||
return files
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""See if URL target is parseable for recursion."""
|
||||
if self.is_directory():
|
||||
return True
|
||||
|
|
@ -171,18 +171,18 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
log.debug(LOG_CHECK, "URL with content type %r is not parseable.", self.content_type)
|
||||
return False
|
||||
|
||||
def is_directory (self):
|
||||
def is_directory(self):
|
||||
"""See if URL target is a directory."""
|
||||
# either the path is empty, or ends with a slash
|
||||
path = self.urlparts[2]
|
||||
return (not path) or path.endswith('/')
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Set URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
|
||||
|
||||
def read_content (self):
|
||||
def read_content(self):
|
||||
"""Return URL target content, or in case of directories a dummy HTML
|
||||
file with links to the files."""
|
||||
if self.is_directory():
|
||||
|
|
@ -194,7 +194,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# download file in BINARY mode
|
||||
ftpcmd = "RETR %s" % self.filename
|
||||
buf = StringIO()
|
||||
def stor_data (s):
|
||||
def stor_data(s):
|
||||
"""Helper method storing given data"""
|
||||
# limit the download size
|
||||
if (buf.tell() + len(s)) > self.max_size:
|
||||
|
|
@ -205,7 +205,7 @@ class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
buf.close()
|
||||
return data
|
||||
|
||||
def close_connection (self):
|
||||
def close_connection(self):
|
||||
"""Release the open connection from the connection pool."""
|
||||
if self.url_connection is not None:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
Url link with http scheme.
|
||||
"""
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Initialize HTTP specific variables.
|
||||
"""
|
||||
|
|
@ -63,7 +63,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.ssl_cipher = None
|
||||
self.ssl_cert = None
|
||||
|
||||
def allows_robots (self, url):
|
||||
def allows_robots(self, url):
|
||||
"""
|
||||
Fetch and parse the robots.txt of given url. Checks if LinkChecker
|
||||
can get the requested resource content.
|
||||
|
|
@ -75,7 +75,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"""
|
||||
return not self.aggregate.config['robotstxt'] or self.aggregate.robots_txt.allows_url(self)
|
||||
|
||||
def content_allows_robots (self):
|
||||
def content_allows_robots(self):
|
||||
"""
|
||||
Return False if the content of this URL forbids robots to
|
||||
search for recursive links.
|
||||
|
|
@ -86,7 +86,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
soup = self.get_soup()
|
||||
return not soup.find("meta", attrs={"name": "robots", "content": nofollow_re})
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Get size of URL content from HTTP header."""
|
||||
if self.headers and "Content-Length" in self.headers and \
|
||||
"Transfer-Encoding" not in self.headers:
|
||||
|
|
@ -99,7 +99,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
self.size = -1
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Check a URL with HTTP protocol.
|
||||
Here is an excerpt from RFC 1945 with common response codes:
|
||||
|
|
@ -205,7 +205,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
self.ssl_cert = None
|
||||
|
||||
def construct_auth (self):
|
||||
def construct_auth(self):
|
||||
"""Construct HTTP Basic authentication credentials if there
|
||||
is user/password information available. Does not overwrite if
|
||||
credentials have already been constructed."""
|
||||
|
|
@ -215,7 +215,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if _user is not None and _password is not None:
|
||||
self.auth = (_user, _password)
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Return content MIME type or empty string."""
|
||||
self.content_type = httputil.get_content_type(self.headers)
|
||||
|
||||
|
|
@ -270,7 +270,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# run connection plugins for old connection
|
||||
self.aggregate.plugin_manager.run_connection_plugins(self)
|
||||
|
||||
def getheader (self, name, default=None):
|
||||
def getheader(self, name, default=None):
|
||||
"""Get decoded header value.
|
||||
|
||||
@return: decoded header value or default of not found
|
||||
|
|
@ -281,7 +281,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return default
|
||||
return unicode_safe(value, encoding=HEADER_ENCODING)
|
||||
|
||||
def check_response (self):
|
||||
def check_response(self):
|
||||
"""Check final result and log it."""
|
||||
if self.url_connection.status_code >= 400:
|
||||
self.set_result("%d %s" % (self.url_connection.status_code, self.url_connection.reason),
|
||||
|
|
@ -333,7 +333,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
name = "Content-Location: header"
|
||||
self.add_url(url, name=name)
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""
|
||||
Check if content is parseable for recursion.
|
||||
|
||||
|
|
@ -353,7 +353,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return False
|
||||
return True
|
||||
|
||||
def get_robots_txt_url (self):
|
||||
def get_robots_txt_url(self):
|
||||
"""
|
||||
Get the according robots.txt URL for this URL.
|
||||
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@ from . import unknownurl
|
|||
class IgnoreUrl(unknownurl.UnknownUrl):
|
||||
"""Always ignored URL."""
|
||||
|
||||
def is_ignored (self):
|
||||
def is_ignored(self):
|
||||
"""Return True if this URL scheme is ignored."""
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ def get_intern_pattern(url):
|
|||
class InternPatternUrl(urlbase.UrlBase):
|
||||
"""Class supporting an intern URL pattern."""
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""
|
||||
Get pattern for intern URL matching.
|
||||
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
Url link with mailto scheme.
|
||||
"""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""Call super.build_url(), extract list of mail addresses from URL,
|
||||
and check their syntax.
|
||||
"""
|
||||
|
|
@ -84,7 +84,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
self.add_warning(_("No mail addresses or email subject found in `%(url)s'.") % \
|
||||
{"url": self.url})
|
||||
|
||||
def parse_addresses (self):
|
||||
def parse_addresses(self):
|
||||
"""Parse all mail addresses out of the URL target. Also parses
|
||||
optional CGI headers like "?to=foo@example.org".
|
||||
Stores parsed addresses in the self.addresses set.
|
||||
|
|
@ -127,7 +127,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
self.addresses.update(getaddresses(url))
|
||||
log.debug(LOG_CHECK, "addresses: %s", self.addresses)
|
||||
|
||||
def check_email_syntax (self, mail):
|
||||
def check_email_syntax(self, mail):
|
||||
"""Check email syntax. The relevant RFCs:
|
||||
- How to check names (memo):
|
||||
http://tools.ietf.org/html/rfc3696
|
||||
|
|
@ -220,7 +220,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
{"addr": mail}, valid=False, overwrite=False)
|
||||
return
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Verify a list of email addresses. If one address fails,
|
||||
the whole list will fail.
|
||||
|
|
@ -235,7 +235,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
if not self.valid:
|
||||
break
|
||||
|
||||
def check_smtp_domain (self, mail):
|
||||
def check_smtp_domain(self, mail):
|
||||
"""
|
||||
Check a single mail address.
|
||||
"""
|
||||
|
|
@ -292,7 +292,7 @@ class MailtoUrl(urlbase.UrlBase):
|
|||
emails = ",".join(sorted(self.addresses))
|
||||
self.cache_url = "%s:%s" % (self.scheme, emails)
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
mailto: URLs do not have any content
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,7 @@ class NntpUrl(urlbase.UrlBase):
|
|||
Url link with NNTP scheme.
|
||||
"""
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Connect to NNTP server and try to request the URL article
|
||||
resource (if specified).
|
||||
|
|
@ -64,7 +64,7 @@ class NntpUrl(urlbase.UrlBase):
|
|||
self.add_warning(_("No newsgroup specified in NNTP URL."),
|
||||
tag=WARN_NNTP_NO_NEWSGROUP)
|
||||
|
||||
def _connect_nntp (self, nntpserver):
|
||||
def _connect_nntp(self, nntpserver):
|
||||
"""
|
||||
This is done only once per checking task. Also, the newly
|
||||
introduced error codes 504 and 505 (both inclining "Too busy, retry
|
||||
|
|
@ -91,11 +91,11 @@ class NntpUrl(urlbase.UrlBase):
|
|||
self.add_info(nntp.getwelcome())
|
||||
return nntp
|
||||
|
||||
def wait (self):
|
||||
def wait(self):
|
||||
"""Wait some time before trying to connect again."""
|
||||
time.sleep(random.randrange(10, 30))
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
NNTP urls have no content.
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
|
|||
class ProxySupport:
|
||||
"""Get support for proxying and for URLs with user:pass@host setting."""
|
||||
|
||||
def set_proxy (self, proxy):
|
||||
def set_proxy(self, proxy):
|
||||
"""Parse given proxy information and store parsed values.
|
||||
Note that only http:// proxies are supported, both for ftp://
|
||||
and http:// URLs.
|
||||
|
|
@ -60,7 +60,7 @@ class ProxySupport:
|
|||
auth = "%s:%s" % (username, password)
|
||||
self.proxyauth = "Basic "+httputil.encode_base64(auth)
|
||||
|
||||
def ignore_proxy_host (self):
|
||||
def ignore_proxy_host(self):
|
||||
"""Check if self.host is in the $no_proxy ignore list."""
|
||||
if urllib.request.proxy_bypass(self.host):
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class TelnetUrl(urlbase.UrlBase):
|
|||
Url link with telnet scheme.
|
||||
"""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Call super.build_url(), set default telnet port and initialize
|
||||
the login credentials.
|
||||
|
|
@ -44,7 +44,7 @@ class TelnetUrl(urlbase.UrlBase):
|
|||
# set user/pass
|
||||
self.user, self.password = self.get_user_password()
|
||||
|
||||
def local_check (self):
|
||||
def local_check(self):
|
||||
"""
|
||||
Warn about empty host names. Else call super.local_check().
|
||||
"""
|
||||
|
|
@ -53,7 +53,7 @@ class TelnetUrl(urlbase.UrlBase):
|
|||
return
|
||||
super(TelnetUrl, self).local_check()
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Open a telnet connection and try to login. Expected login
|
||||
label is "login: ", expected password label is "Password: ".
|
||||
|
|
@ -71,7 +71,7 @@ class TelnetUrl(urlbase.UrlBase):
|
|||
# XXX how to tell if we are logged in??
|
||||
self.url_connection.write(b"exit\n")
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
Telnet URLs have no content.
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from . import urlbase
|
|||
class UnknownUrl(urlbase.UrlBase):
|
||||
"""Handle unknown or just plain broken URLs."""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""Only logs that this URL is unknown."""
|
||||
super(UnknownUrl, self).build_url()
|
||||
if self.is_ignored():
|
||||
|
|
@ -35,11 +35,11 @@ class UnknownUrl(urlbase.UrlBase):
|
|||
self.set_result(_("URL is unrecognized or has invalid syntax"),
|
||||
valid=False)
|
||||
|
||||
def is_ignored (self):
|
||||
def is_ignored(self):
|
||||
"""Return True if this URL scheme is ignored."""
|
||||
return is_unknown_scheme(self.scheme)
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""Unknown URLs have no content.
|
||||
|
||||
@return: False
|
||||
|
|
|
|||
|
|
@ -95,7 +95,7 @@ class UrlBase:
|
|||
# Read in 16kb chunks
|
||||
ReadChunkBytes = 1024*16
|
||||
|
||||
def __init__ (self, base_url, recursion_level, aggregate,
|
||||
def __init__(self, base_url, recursion_level, aggregate,
|
||||
parent_url=None, base_ref=None, line=-1, column=-1, page=-1,
|
||||
name="", url_encoding=None, extern=None):
|
||||
"""
|
||||
|
|
@ -126,7 +126,7 @@ class UrlBase:
|
|||
if not self.has_result:
|
||||
self.set_result(_("filtered"))
|
||||
|
||||
def init (self, base_ref, base_url, parent_url, recursion_level,
|
||||
def init(self, base_ref, base_url, parent_url, recursion_level,
|
||||
aggregate, line, column, page, name, url_encoding, extern):
|
||||
"""
|
||||
Initialize internal data.
|
||||
|
|
@ -162,7 +162,7 @@ class UrlBase:
|
|||
self.add_warning(_("Leading or trailing whitespace in URL `%(url)s'.") %
|
||||
{"url": base_url}, tag=WARN_URL_WHITESPACE)
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all variables to default values.
|
||||
"""
|
||||
|
|
@ -215,7 +215,7 @@ class UrlBase:
|
|||
# URLs seen through redirections
|
||||
self.aliases = []
|
||||
|
||||
def set_result (self, msg, valid=True, overwrite=False):
|
||||
def set_result(self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
Set result string and validity.
|
||||
"""
|
||||
|
|
@ -233,7 +233,7 @@ class UrlBase:
|
|||
# free content data
|
||||
self.data = None
|
||||
|
||||
def get_title (self):
|
||||
def get_title(self):
|
||||
"""Return title of page the URL refers to.
|
||||
This is per default the filename or the URL."""
|
||||
if self.title is None:
|
||||
|
|
@ -249,17 +249,17 @@ class UrlBase:
|
|||
self.title = title
|
||||
return self.title
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""
|
||||
Return True iff content of this url is parseable.
|
||||
"""
|
||||
return False
|
||||
|
||||
def is_html (self):
|
||||
def is_html(self):
|
||||
"""Return True iff content of this url is HTML formatted."""
|
||||
return self._is_ctype("html")
|
||||
|
||||
def is_css (self):
|
||||
def is_css(self):
|
||||
"""Return True iff content of this url is CSS stylesheet."""
|
||||
return self._is_ctype("css")
|
||||
|
||||
|
|
@ -270,11 +270,11 @@ class UrlBase:
|
|||
mime = self.content_type
|
||||
return self.ContentMimetypes.get(mime) == ctype
|
||||
|
||||
def is_http (self):
|
||||
def is_http(self):
|
||||
"""Return True for http:// or https:// URLs."""
|
||||
return self.scheme in ("http", "https")
|
||||
|
||||
def is_file (self):
|
||||
def is_file(self):
|
||||
"""Return True for file:// URLs."""
|
||||
return self.scheme == "file"
|
||||
|
||||
|
|
@ -286,7 +286,7 @@ class UrlBase:
|
|||
"""Return True for local (ie. file://) URLs."""
|
||||
return self.is_file()
|
||||
|
||||
def add_warning (self, s, tag=None):
|
||||
def add_warning(self, s, tag=None):
|
||||
"""
|
||||
Add a warning string.
|
||||
"""
|
||||
|
|
@ -295,14 +295,14 @@ class UrlBase:
|
|||
tag not in self.aggregate.config["ignorewarnings"]:
|
||||
self.warnings.append(item)
|
||||
|
||||
def add_info (self, s):
|
||||
def add_info(self, s):
|
||||
"""
|
||||
Add an info string.
|
||||
"""
|
||||
if s not in self.info:
|
||||
self.info.append(s)
|
||||
|
||||
def set_cache_url (self):
|
||||
def set_cache_url(self):
|
||||
"""Set the URL to be used for caching."""
|
||||
# remove anchor from cached target url since we assume
|
||||
# URLs with different anchors to have the same content
|
||||
|
|
@ -310,7 +310,7 @@ class UrlBase:
|
|||
if self.cache_url is not None:
|
||||
assert isinstance(self.cache_url, str_text), repr(self.cache_url)
|
||||
|
||||
def check_syntax (self):
|
||||
def check_syntax(self):
|
||||
"""
|
||||
Called before self.check(), this function inspects the
|
||||
url syntax. Success enables further checking, failure
|
||||
|
|
@ -343,7 +343,7 @@ class UrlBase:
|
|||
args = dict(len=len(self.url), max=URL_MAX_LENGTH)
|
||||
self.add_warning(_("URL length %(len)d is longer than %(max)d.") % args, tag=WARN_URL_TOO_LONG)
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Construct self.url and self.urlparts out of the given base
|
||||
url information self.base_url, self.parent_url and self.base_ref.
|
||||
|
|
@ -378,7 +378,7 @@ class UrlBase:
|
|||
# and unsplit again
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def build_url_parts (self):
|
||||
def build_url_parts(self):
|
||||
"""Set userinfo, host, port and anchor from self.urlparts.
|
||||
Also checks for obfuscated IP addresses.
|
||||
"""
|
||||
|
|
@ -409,7 +409,7 @@ class UrlBase:
|
|||
if self.anchor is not None:
|
||||
assert isinstance(self.anchor, str_text), repr(self.anchor)
|
||||
|
||||
def check_obfuscated_ip (self):
|
||||
def check_obfuscated_ip(self):
|
||||
"""Warn if host of this URL is obfuscated IP address."""
|
||||
# check if self.host can be an IP address
|
||||
# check for obfuscated IP address
|
||||
|
|
@ -422,7 +422,7 @@ class UrlBase:
|
|||
{"url": self.base_url, "ip": ips[0]},
|
||||
tag=WARN_URL_OBFUSCATED_IP)
|
||||
|
||||
def check (self):
|
||||
def check(self):
|
||||
"""Main check function for checking this URL."""
|
||||
if self.aggregate.config["trace"]:
|
||||
trace.trace_on()
|
||||
|
|
@ -437,7 +437,7 @@ class UrlBase:
|
|||
else:
|
||||
raise
|
||||
|
||||
def local_check (self):
|
||||
def local_check(self):
|
||||
"""Local check function can be overridden in subclasses."""
|
||||
log.debug(LOG_CHECK, "Checking %s", str_text(self))
|
||||
# strict extern URLs should not be checked
|
||||
|
|
@ -476,7 +476,7 @@ class UrlBase:
|
|||
{"msg": str_text(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
|
||||
return False
|
||||
|
||||
def close_connection (self):
|
||||
def close_connection(self):
|
||||
"""
|
||||
Close an opened url connection.
|
||||
"""
|
||||
|
|
@ -490,7 +490,7 @@ class UrlBase:
|
|||
pass
|
||||
self.url_connection = None
|
||||
|
||||
def handle_exception (self):
|
||||
def handle_exception(self):
|
||||
"""
|
||||
An exception occurred. Log it and set the cache flag.
|
||||
"""
|
||||
|
|
@ -510,14 +510,14 @@ class UrlBase:
|
|||
# limit length to 240
|
||||
return strformat.limit(errmsg, length=240)
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
The basic connection check uses urlopen to initialize
|
||||
a connection object.
|
||||
"""
|
||||
self.url_connection = urlopen(self.url)
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Set size of URL content (if any)..
|
||||
Should be overridden in subclasses."""
|
||||
maxbytes = self.aggregate.config["maxfilesizedownload"]
|
||||
|
|
@ -539,7 +539,7 @@ class UrlBase:
|
|||
return False
|
||||
return True
|
||||
|
||||
def allows_recursion (self):
|
||||
def allows_recursion(self):
|
||||
"""
|
||||
Return True iff we can recurse into the url's content.
|
||||
"""
|
||||
|
|
@ -568,7 +568,7 @@ class UrlBase:
|
|||
"""Returns True: only check robots.txt on HTTP links."""
|
||||
return True
|
||||
|
||||
def set_extern (self, url):
|
||||
def set_extern(self, url):
|
||||
"""
|
||||
Match URL against extern and intern link patterns. If no pattern
|
||||
matches the URL is extern. Sets self.extern to a tuple (bool,
|
||||
|
|
@ -600,12 +600,12 @@ class UrlBase:
|
|||
else:
|
||||
self.extern = (1, 1)
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Set content MIME type.
|
||||
Should be overridden in subclasses."""
|
||||
pass
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""Indicate wether url get_content() can be called."""
|
||||
return self.size <= self.aggregate.config["maxfilesizedownload"]
|
||||
|
||||
|
|
@ -632,7 +632,7 @@ class UrlBase:
|
|||
self.data = self.download_content()
|
||||
return self.data
|
||||
|
||||
def get_content (self):
|
||||
def get_content(self):
|
||||
if self.text is None:
|
||||
self.get_raw_content()
|
||||
self.soup = htmlsoup.make_soup(self.data)
|
||||
|
|
@ -657,7 +657,7 @@ class UrlBase:
|
|||
"""
|
||||
return self.url_connection.read(self.ReadChunkBytes)
|
||||
|
||||
def get_user_password (self):
|
||||
def get_user_password(self):
|
||||
"""Get tuple (user, password) from configured authentication.
|
||||
Both user and password can be None.
|
||||
"""
|
||||
|
|
@ -666,7 +666,7 @@ class UrlBase:
|
|||
return urllib.parse.splitpasswd(self.userinfo)
|
||||
return self.aggregate.config.get_user_password(self.url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
"""Add new URL to queue."""
|
||||
if base:
|
||||
base_ref = urlutil.url_norm(base, encoding=self.encoding)[0]
|
||||
|
|
@ -677,7 +677,7 @@ class UrlBase:
|
|||
page=page, name=name, parent_content_type=self.content_type, url_encoding=self.encoding)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def serialized (self, sep=os.linesep):
|
||||
def serialized(self, sep=os.linesep):
|
||||
"""
|
||||
Return serialized url check data as unicode string.
|
||||
"""
|
||||
|
|
@ -696,7 +696,7 @@ class UrlBase:
|
|||
"cache_url=%s" % self.cache_url,
|
||||
])
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""Get pattern for intern URL matching.
|
||||
|
||||
@param url: the URL to set intern pattern for, else self.url
|
||||
|
|
@ -737,7 +737,7 @@ class UrlBase:
|
|||
s = str_text(self)
|
||||
return self.aggregate.config['logger'].encode(s)
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""
|
||||
Get URL info.
|
||||
|
||||
|
|
@ -746,7 +746,7 @@ class UrlBase:
|
|||
"""
|
||||
return "<%s>" % self.serialized(sep=", ")
|
||||
|
||||
def to_wire_dict (self):
|
||||
def to_wire_dict(self):
|
||||
"""Return a simplified transport object for logging and caching.
|
||||
|
||||
The transport object must contain these attributes:
|
||||
|
|
@ -813,7 +813,7 @@ class UrlBase:
|
|||
modified=self.modified,
|
||||
)
|
||||
|
||||
def to_wire (self):
|
||||
def to_wire(self):
|
||||
"""Return compact UrlData object with information from to_wire_dict().
|
||||
"""
|
||||
return CompactUrlData(self.to_wire_dict())
|
||||
|
|
|
|||
|
|
@ -150,7 +150,7 @@ class Configuration(dict):
|
|||
the command line as well as from configuration files.
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize the default options.
|
||||
"""
|
||||
|
|
@ -210,18 +210,18 @@ class Configuration(dict):
|
|||
"""Set the status logger."""
|
||||
self.status_logger = status_logger
|
||||
|
||||
def logger_new (self, loggername, **kwargs):
|
||||
def logger_new(self, loggername, **kwargs):
|
||||
"""Instantiate new logger and return it."""
|
||||
args = self[loggername]
|
||||
args.update(kwargs)
|
||||
return self.loggers[loggername](**args)
|
||||
|
||||
def logger_add (self, loggerclass):
|
||||
def logger_add(self, loggerclass):
|
||||
"""Add a new logger type to the known loggers."""
|
||||
self.loggers[loggerclass.LoggerName] = loggerclass
|
||||
self[loggerclass.LoggerName] = {}
|
||||
|
||||
def read (self, files=None):
|
||||
def read(self, files=None):
|
||||
"""
|
||||
Read settings from given config files.
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ class Configuration(dict):
|
|||
log.debug(LOG_CHECK, "reading configuration from %s", filtered_cfiles)
|
||||
confparse.LCConfigParser(self).read(filtered_cfiles)
|
||||
|
||||
def add_auth (self, user=None, password=None, pattern=None):
|
||||
def add_auth(self, user=None, password=None, pattern=None):
|
||||
"""Add given authentication data."""
|
||||
if not user or not pattern:
|
||||
log.warn(LOG_CHECK,
|
||||
|
|
@ -260,7 +260,7 @@ class Configuration(dict):
|
|||
)
|
||||
self["authentication"].append(entry)
|
||||
|
||||
def get_user_password (self, url):
|
||||
def get_user_password(self, url):
|
||||
"""Get tuple (user, password) from configured authentication
|
||||
that matches the given URL.
|
||||
Both user and password can be None if not specified, or no
|
||||
|
|
@ -275,7 +275,7 @@ class Configuration(dict):
|
|||
"""Get dict with limit per connection type."""
|
||||
return {key: self['maxconnections%s' % key] for key in ('http', 'https', 'ftp')}
|
||||
|
||||
def sanitize (self):
|
||||
def sanitize(self):
|
||||
"Make sure the configuration is consistent."
|
||||
if self['logger'] is None:
|
||||
self.sanitize_logger()
|
||||
|
|
@ -287,14 +287,14 @@ class Configuration(dict):
|
|||
# set default socket timeout
|
||||
socket.setdefaulttimeout(self['timeout'])
|
||||
|
||||
def sanitize_logger (self):
|
||||
def sanitize_logger(self):
|
||||
"""Make logger configuration consistent."""
|
||||
if not self['output']:
|
||||
log.warn(LOG_CHECK, _("activating text logger output."))
|
||||
self['output'] = 'text'
|
||||
self['logger'] = self.logger_new(self['output'])
|
||||
|
||||
def sanitize_loginurl (self):
|
||||
def sanitize_loginurl(self):
|
||||
"""Make login configuration consistent."""
|
||||
url = self["loginurl"]
|
||||
disable = False
|
||||
|
|
@ -322,7 +322,7 @@ class Configuration(dict):
|
|||
_("disabling login URL %(url)s.") % {"url": url})
|
||||
self["loginurl"] = None
|
||||
|
||||
def sanitize_proxies (self):
|
||||
def sanitize_proxies(self):
|
||||
"""Try to read additional proxy settings which urllib does not
|
||||
support."""
|
||||
if os.name != 'posix':
|
||||
|
|
|
|||
|
|
@ -35,12 +35,12 @@ class LCConfigParser(RawConfigParser):
|
|||
Parse a LinkChecker configuration file.
|
||||
"""
|
||||
|
||||
def __init__ (self, config):
|
||||
def __init__(self, config):
|
||||
"""Initialize configuration."""
|
||||
super(LCConfigParser, self).__init__()
|
||||
self.config = config
|
||||
|
||||
def read (self, files):
|
||||
def read(self, files):
|
||||
"""Read settings from given config files.
|
||||
|
||||
@raises: LinkCheckerError on syntax errors in the config file(s)
|
||||
|
|
@ -61,7 +61,7 @@ class LCConfigParser(RawConfigParser):
|
|||
raise LinkCheckerError(
|
||||
_("Error parsing configuration: %s") % str(msg))
|
||||
|
||||
def read_string_option (self, section, option, allowempty=False):
|
||||
def read_string_option(self, section, option, allowempty=False):
|
||||
"""Read a string option."""
|
||||
if self.has_option(section, option):
|
||||
value = self.get(section, option)
|
||||
|
|
@ -74,7 +74,7 @@ class LCConfigParser(RawConfigParser):
|
|||
if self.has_option(section, option):
|
||||
self.config[option] = self.getboolean(section, option)
|
||||
|
||||
def read_int_option (self, section, option, key=None, min=None, max=None):
|
||||
def read_int_option(self, section, option, key=None, min=None, max=None):
|
||||
"""Read an integer option."""
|
||||
if self.has_option(section, option):
|
||||
num = self.getint(section, option)
|
||||
|
|
@ -88,7 +88,7 @@ class LCConfigParser(RawConfigParser):
|
|||
key = option
|
||||
self.config[key] = num
|
||||
|
||||
def read_output_config (self):
|
||||
def read_output_config(self):
|
||||
"""Read configuration options in section "output"."""
|
||||
section = "output"
|
||||
from ..logger import LoggerClasses
|
||||
|
|
@ -130,7 +130,7 @@ class LCConfigParser(RawConfigParser):
|
|||
output = self.config.logger_new(val, fileoutput=1)
|
||||
self.config['fileoutput'].append(output)
|
||||
|
||||
def read_checking_config (self):
|
||||
def read_checking_config(self):
|
||||
"""Read configuration options in section "checking"."""
|
||||
section = "checking"
|
||||
self.read_int_option(section, "threads", min=-1)
|
||||
|
|
@ -157,7 +157,7 @@ class LCConfigParser(RawConfigParser):
|
|||
self.read_string_option(section, "sslverify")
|
||||
self.read_int_option(section, "maxrunseconds", min=0)
|
||||
|
||||
def read_authentication_config (self):
|
||||
def read_authentication_config(self):
|
||||
"""Read configuration options in section "authentication"."""
|
||||
section = "authentication"
|
||||
password_fields = []
|
||||
|
|
@ -207,7 +207,7 @@ class LCConfigParser(RawConfigParser):
|
|||
elif os.name == 'nt':
|
||||
log.warn(LOG_CHECK, _("See http://support.microsoft.com/kb/308419 for more info on setting file permissions."))
|
||||
|
||||
def read_filtering_config (self):
|
||||
def read_filtering_config(self):
|
||||
"""
|
||||
Read configuration options in section "filtering".
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -20,14 +20,14 @@ Special container classes.
|
|||
class LFUCache(dict):
|
||||
"""Limited cache which purges least frequently used items."""
|
||||
|
||||
def __init__ (self, size=1000):
|
||||
def __init__(self, size=1000):
|
||||
"""Initialize internal LFU cache."""
|
||||
super(LFUCache, self).__init__()
|
||||
if size < 1:
|
||||
raise ValueError("invalid cache size %d" % size)
|
||||
self.size = size
|
||||
|
||||
def __setitem__ (self, key, val):
|
||||
def __setitem__(self, key, val):
|
||||
"""Store given key/value."""
|
||||
if key in self:
|
||||
# store value, do not increase number of uses
|
||||
|
|
@ -38,7 +38,7 @@ class LFUCache(dict):
|
|||
if len(self) > self.size:
|
||||
self.shrink()
|
||||
|
||||
def shrink (self):
|
||||
def shrink(self):
|
||||
"""Shrink ca. 5% of entries."""
|
||||
trim = int(0.05*len(self))
|
||||
if trim:
|
||||
|
|
@ -49,24 +49,24 @@ class LFUCache(dict):
|
|||
for item in values[0:trim]:
|
||||
del self[item[0]]
|
||||
|
||||
def __getitem__ (self, key):
|
||||
def __getitem__(self, key):
|
||||
"""Update key usage and return value."""
|
||||
entry = super(LFUCache, self).__getitem__(key)
|
||||
entry[0] += 1
|
||||
return entry[1]
|
||||
|
||||
def uses (self, key):
|
||||
def uses(self, key):
|
||||
"""Get number of uses for given key (without increasing the number of
|
||||
uses)"""
|
||||
return super(LFUCache, self).__getitem__(key)[0]
|
||||
|
||||
def get (self, key, def_val=None):
|
||||
def get(self, key, def_val=None):
|
||||
"""Update key usage if found and return value, else return default."""
|
||||
if key in self:
|
||||
return self[key]
|
||||
return def_val
|
||||
|
||||
def setdefault (self, key, def_val=None):
|
||||
def setdefault(self, key, def_val=None):
|
||||
"""Update key usage if found and return value, else set and return
|
||||
default."""
|
||||
if key in self:
|
||||
|
|
@ -74,30 +74,30 @@ class LFUCache(dict):
|
|||
self[key] = def_val
|
||||
return def_val
|
||||
|
||||
def items (self):
|
||||
def items(self):
|
||||
"""Return list of items, not updating usage count."""
|
||||
return [(key, value[1]) for key, value in super(LFUCache, self).items()]
|
||||
|
||||
def iteritems (self):
|
||||
def iteritems(self):
|
||||
"""Return iterator of items, not updating usage count."""
|
||||
for key, value in super(LFUCache, self).items():
|
||||
yield (key, value[1])
|
||||
|
||||
def values (self):
|
||||
def values(self):
|
||||
"""Return list of values, not updating usage count."""
|
||||
return [value[1] for value in super(LFUCache, self).values()]
|
||||
|
||||
def itervalues (self):
|
||||
def itervalues(self):
|
||||
"""Return iterator of values, not updating usage count."""
|
||||
for value in super(LFUCache, self).values():
|
||||
yield value[1]
|
||||
|
||||
def popitem (self):
|
||||
def popitem(self):
|
||||
"""Remove and return an item."""
|
||||
key, value = super(LFUCache, self).popitem()
|
||||
return (key, value[1])
|
||||
|
||||
def pop (self):
|
||||
def pop(self):
|
||||
"""Remove and return a value."""
|
||||
value = super(LFUCache, self).pop()
|
||||
return value[1]
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ def update_func_meta(fake_func, real_func):
|
|||
def deprecated(func):
|
||||
"""A decorator which can be used to mark functions as deprecated.
|
||||
It emits a warning when the function is called."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Print deprecated warning and execute original function."""
|
||||
warnings.warn("Call to deprecated function %s." % func.__name__,
|
||||
category=DeprecationWarning)
|
||||
|
|
@ -74,7 +74,7 @@ def signal_handler(signal_number):
|
|||
no handler is set.
|
||||
"""
|
||||
# create the 'real' decorator which takes only a function as an argument
|
||||
def newfunc (function):
|
||||
def newfunc(function):
|
||||
"""Register function as signal handler."""
|
||||
# note: actually the kill(2) function uses the signal number of 0
|
||||
# for a special case, but for signal(2) only positive integers
|
||||
|
|
@ -88,7 +88,7 @@ def signal_handler(signal_number):
|
|||
|
||||
def synchronize(lock, func, log_duration_secs=0):
|
||||
"""Return synchronized function acquiring the given lock."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Execute function synchronized."""
|
||||
t = time.time()
|
||||
with lock:
|
||||
|
|
@ -106,7 +106,7 @@ def synchronized(lock):
|
|||
|
||||
def notimplemented(func):
|
||||
"""Raises a NotImplementedError if the function is called."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Raise NotImplementedError"""
|
||||
co = func.func_code
|
||||
attrs = (co.co_name, co.co_filename, co.co_firstlineno)
|
||||
|
|
@ -117,7 +117,7 @@ def notimplemented(func):
|
|||
def timeit(func, log, limit):
|
||||
"""Print execution time of the function. For quick'n'dirty profiling."""
|
||||
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Execute function and print execution time."""
|
||||
t = time.time()
|
||||
res = func(*args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ def new_request_session(config, cookies):
|
|||
class Aggregate:
|
||||
"""Store thread-safe data collections for checker threads."""
|
||||
|
||||
def __init__ (self, config, urlqueue, robots_txt, plugin_manager,
|
||||
def __init__(self, config, urlqueue, robots_txt, plugin_manager,
|
||||
result_cache):
|
||||
"""Store given link checking objects."""
|
||||
self.config = config
|
||||
|
|
@ -105,7 +105,7 @@ class Aggregate:
|
|||
raise LinkCheckerError("No cookies set by login URL %s" % url)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def start_threads (self):
|
||||
def start_threads(self):
|
||||
"""Spawn threads for URL checking and status printing."""
|
||||
if self.config["status"]:
|
||||
t = status.Status(self, self.config["status_wait_seconds"])
|
||||
|
|
@ -150,7 +150,7 @@ class Aggregate:
|
|||
self.times[host] = t + wait_time
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def print_active_threads (self):
|
||||
def print_active_threads(self):
|
||||
"""Log all currently active threads."""
|
||||
debug = log.is_debug(LOG_CHECK)
|
||||
if debug:
|
||||
|
|
@ -174,11 +174,11 @@ class Aggregate:
|
|||
if name.startswith("CheckThread-"):
|
||||
yield name
|
||||
|
||||
def cancel (self):
|
||||
def cancel(self):
|
||||
"""Empty the URL queue."""
|
||||
self.urlqueue.do_shutdown()
|
||||
|
||||
def abort (self):
|
||||
def abort(self):
|
||||
"""Print still-active URLs and empty the URL queue."""
|
||||
self.print_active_threads()
|
||||
self.cancel()
|
||||
|
|
@ -190,12 +190,12 @@ class Aggregate:
|
|||
raise KeyboardInterrupt()
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def remove_stopped_threads (self):
|
||||
def remove_stopped_threads(self):
|
||||
"""Remove the stopped threads from the internal thread list."""
|
||||
self.threads = [t for t in self.threads if t.is_alive()]
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def finish (self):
|
||||
def finish(self):
|
||||
"""Wait for checker threads to finish."""
|
||||
if not self.urlqueue.empty():
|
||||
# This happens when all checker threads died.
|
||||
|
|
@ -206,7 +206,7 @@ class Aggregate:
|
|||
t.join(timeout=1.0)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def is_finished (self):
|
||||
def is_finished(self):
|
||||
"""Determine if checking is finished."""
|
||||
self.remove_stopped_threads()
|
||||
return self.urlqueue.empty() and not self.threads
|
||||
|
|
|
|||
|
|
@ -80,21 +80,21 @@ def check_url(url_data, logger):
|
|||
class Checker(task.LoggedCheckedTask):
|
||||
"""URL check thread."""
|
||||
|
||||
def __init__ (self, urlqueue, logger, add_request_session):
|
||||
def __init__(self, urlqueue, logger, add_request_session):
|
||||
"""Store URL queue and logger."""
|
||||
super(Checker, self).__init__(logger)
|
||||
self.urlqueue = urlqueue
|
||||
self.origname = self.getName()
|
||||
self.add_request_session = add_request_session
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Check URLs in the queue."""
|
||||
# construct per-thread HTTP/S requests session
|
||||
self.add_request_session()
|
||||
while not self.stopped(0):
|
||||
self.check_url()
|
||||
|
||||
def check_url (self):
|
||||
def check_url(self):
|
||||
"""Try to get URL data from queue and check it."""
|
||||
try:
|
||||
url_data = self.urlqueue.get(timeout=QUEUE_POLL_INTERVALL_SECS)
|
||||
|
|
@ -109,7 +109,7 @@ class Checker(task.LoggedCheckedTask):
|
|||
except Exception:
|
||||
self.internal_error()
|
||||
|
||||
def check_url_data (self, url_data):
|
||||
def check_url_data(self, url_data):
|
||||
"""Check one URL data instance."""
|
||||
if url_data.url is None:
|
||||
url = ""
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ stdout = i18n.get_encoded_writer()
|
|||
class StatusLogger:
|
||||
"""Standard status logger. Default output is stderr."""
|
||||
|
||||
def __init__ (self, fd=stderr):
|
||||
def __init__(self, fd=stderr):
|
||||
"""Save file descriptor for logging."""
|
||||
self.fd = fd
|
||||
|
||||
def log_status (self, checked, in_progress, queue, duration, num_urls):
|
||||
def log_status(self, checked, in_progress, queue, duration, num_urls):
|
||||
"""Write status message to file descriptor."""
|
||||
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
|
||||
in_progress
|
||||
|
|
@ -48,15 +48,15 @@ class StatusLogger:
|
|||
self.writeln(msg)
|
||||
self.flush()
|
||||
|
||||
def write (self, msg):
|
||||
def write(self, msg):
|
||||
"""Write message to file descriptor."""
|
||||
self.fd.write(msg)
|
||||
|
||||
def writeln (self, msg):
|
||||
def writeln(self, msg):
|
||||
"""Write status message and line break to file descriptor."""
|
||||
self.fd.write("%s%s" % (msg, os.linesep))
|
||||
|
||||
def flush (self):
|
||||
def flush(self):
|
||||
"""Flush file descriptor."""
|
||||
self.fd.flush()
|
||||
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class Interrupt(task.CheckedTask):
|
|||
"""
|
||||
WaitSeconds = 5
|
||||
|
||||
def __init__ (self, duration):
|
||||
def __init__(self, duration):
|
||||
"""Initialize the task.
|
||||
@param duration: raise KeyboardInterrupt after given number of seconds
|
||||
@ptype duration: int
|
||||
|
|
@ -34,7 +34,7 @@ class Interrupt(task.CheckedTask):
|
|||
super(Interrupt, self).__init__()
|
||||
self.duration = duration
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Wait and raise KeyboardInterrupt after."""
|
||||
self.start_time = time.time()
|
||||
self.setName("Interrupt")
|
||||
|
|
|
|||
|
|
@ -24,28 +24,28 @@ _lock = threading.Lock()
|
|||
class Logger:
|
||||
"""Thread safe multi-logger class used by aggregator instances."""
|
||||
|
||||
def __init__ (self, config):
|
||||
def __init__(self, config):
|
||||
"""Initialize basic logging variables."""
|
||||
self.loggers = [config['logger']]
|
||||
self.loggers.extend(config['fileoutput'])
|
||||
self.verbose = config["verbose"]
|
||||
self.warnings = config["warnings"]
|
||||
|
||||
def start_log_output (self):
|
||||
def start_log_output(self):
|
||||
"""
|
||||
Start output of all configured loggers.
|
||||
"""
|
||||
for logger in self.loggers:
|
||||
logger.start_output()
|
||||
|
||||
def end_log_output (self, **kwargs):
|
||||
def end_log_output(self, **kwargs):
|
||||
"""
|
||||
End output of all configured loggers.
|
||||
"""
|
||||
for logger in self.loggers:
|
||||
logger.end_output(**kwargs)
|
||||
|
||||
def do_print (self, url_data):
|
||||
def do_print(self, url_data):
|
||||
"""Determine if URL entry should be logged or not."""
|
||||
if self.verbose:
|
||||
return True
|
||||
|
|
@ -54,7 +54,7 @@ class Logger:
|
|||
return not url_data.valid
|
||||
|
||||
@synchronized(_lock)
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Send new url to all configured loggers."""
|
||||
self.check_active_loggers()
|
||||
do_print = self.do_print(url_data)
|
||||
|
|
@ -64,7 +64,7 @@ class Logger:
|
|||
log.log_filter_url(url_data, do_print)
|
||||
|
||||
@synchronized(_lock)
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Document that an internal error occurred."""
|
||||
for logger in self.loggers:
|
||||
logger.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from . import task
|
|||
class Status(task.LoggedCheckedTask):
|
||||
"""Thread that gathers and logs the status periodically."""
|
||||
|
||||
def __init__ (self, aggregator, wait_seconds):
|
||||
def __init__(self, aggregator, wait_seconds):
|
||||
"""Initialize the status logger task.
|
||||
@param urlqueue: the URL queue
|
||||
@ptype urlqueue: Urlqueue
|
||||
|
|
@ -36,7 +36,7 @@ class Status(task.LoggedCheckedTask):
|
|||
self.wait_seconds = wait_seconds
|
||||
assert self.wait_seconds >= 1
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Print periodic status messages."""
|
||||
self.start_time = time.time()
|
||||
self.setName("Status")
|
||||
|
|
@ -49,7 +49,7 @@ class Status(task.LoggedCheckedTask):
|
|||
wait_seconds = self.wait_seconds
|
||||
first_wait = False
|
||||
|
||||
def log_status (self):
|
||||
def log_status(self):
|
||||
"""Log a status message."""
|
||||
duration = time.time() - self.start_time
|
||||
checked, in_progress, queue = self.aggregator.urlqueue.status()
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from . import console
|
|||
class CheckedTask(threader.StoppableThread):
|
||||
"""Stoppable URL check task, handling error conditions while running."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Handle keyboard interrupt and other errors."""
|
||||
try:
|
||||
self.run_checked()
|
||||
|
|
@ -33,12 +33,12 @@ class CheckedTask(threader.StoppableThread):
|
|||
self.internal_error()
|
||||
|
||||
@notimplemented
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Overload in subclass."""
|
||||
pass
|
||||
|
||||
@notimplemented
|
||||
def internal_error (self):
|
||||
def internal_error(self):
|
||||
"""Overload in subclass."""
|
||||
pass
|
||||
|
||||
|
|
@ -46,12 +46,12 @@ class CheckedTask(threader.StoppableThread):
|
|||
class LoggedCheckedTask(CheckedTask):
|
||||
"""URL check task with a logger instance and internal error handling."""
|
||||
|
||||
def __init__ (self, logger):
|
||||
def __init__(self, logger):
|
||||
"""Initialize super instance and store given logger."""
|
||||
super(CheckedTask, self).__init__()
|
||||
self.logger = logger
|
||||
|
||||
def internal_error (self):
|
||||
def internal_error(self):
|
||||
"""Log an internal error on console and the logger."""
|
||||
console.internal_error()
|
||||
self.logger.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -20,55 +20,55 @@ Dummy objects.
|
|||
class Dummy:
|
||||
"""A dummy object ignores all access to it. Useful for testing."""
|
||||
|
||||
def __init__ (self, *args, **kwargs):
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __call__ (self, *args, **kwargs):
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Return self."""
|
||||
return self
|
||||
|
||||
def __getattr__ (self, name):
|
||||
def __getattr__(self, name):
|
||||
"""Return self."""
|
||||
return self
|
||||
|
||||
def __setattr__ (self, name, value):
|
||||
def __setattr__(self, name, value):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __delattr__ (self, name):
|
||||
def __delattr__(self, name):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Return 'dummy'"""
|
||||
return "dummy"
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""Return '<dummy>'"""
|
||||
return "<dummy>"
|
||||
|
||||
def __unicode__ (self):
|
||||
def __unicode__(self):
|
||||
"""Return 'dummy'"""
|
||||
return "dummy"
|
||||
|
||||
def __len__ (self):
|
||||
def __len__(self):
|
||||
"""Return zero"""
|
||||
return 0
|
||||
|
||||
def __getitem__ (self, key):
|
||||
def __getitem__(self, key):
|
||||
"""Return self"""
|
||||
return self
|
||||
|
||||
def __setitem__ (self, key, value):
|
||||
def __setitem__(self, key, value):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __delitem__ (self, key):
|
||||
def __delitem__(self, key):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __contains__ (self, key):
|
||||
def __contains__(self, key):
|
||||
"""Return False"""
|
||||
return False
|
||||
|
||||
|
|
|
|||
|
|
@ -47,14 +47,14 @@ def has_module(name, without_error=True):
|
|||
class GlobDirectoryWalker:
|
||||
"""A forward iterator that traverses a directory tree."""
|
||||
|
||||
def __init__ (self, directory, pattern="*"):
|
||||
def __init__(self, directory, pattern="*"):
|
||||
"""Set start directory and pattern matcher."""
|
||||
self.stack = [directory]
|
||||
self.pattern = pattern
|
||||
self.files = []
|
||||
self.index = 0
|
||||
|
||||
def __getitem__ (self, index):
|
||||
def __getitem__(self, index):
|
||||
"""Search for next filename."""
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -81,22 +81,22 @@ rglob = GlobDirectoryWalker
|
|||
class Buffer:
|
||||
"""Holds buffered data"""
|
||||
|
||||
def __init__ (self, empty=''):
|
||||
def __init__(self, empty=''):
|
||||
"""Initialize buffer."""
|
||||
self.empty = self.buf = empty
|
||||
self.tmpbuf = []
|
||||
self.pos = 0
|
||||
|
||||
def __len__ (self):
|
||||
def __len__(self):
|
||||
"""Buffer length."""
|
||||
return self.pos
|
||||
|
||||
def write (self, data):
|
||||
def write(self, data):
|
||||
"""Write data to buffer."""
|
||||
self.tmpbuf.append(data)
|
||||
self.pos += len(data)
|
||||
|
||||
def flush (self, overlap=0):
|
||||
def flush(self, overlap=0):
|
||||
"""Flush buffered data and return it."""
|
||||
self.buf += self.empty.join(self.tmpbuf)
|
||||
self.tmpbuf = []
|
||||
|
|
|
|||
|
|
@ -123,7 +123,7 @@ class LinkFinder:
|
|||
"""Find HTML links, and apply them to the callback function with the
|
||||
format (url, lineno, column, name, codebase)."""
|
||||
|
||||
def __init__ (self, callback, tags):
|
||||
def __init__(self, callback, tags):
|
||||
"""Store content in buffer and initialize URL list."""
|
||||
self.callback = callback
|
||||
# set universal tag attributes using tagname None
|
||||
|
|
@ -135,7 +135,7 @@ class LinkFinder:
|
|||
self.tags[tag].update(self.universal_attrs)
|
||||
self.base_ref = ''
|
||||
|
||||
def html_element (self, tag, attrs, element_text, lineno, column):
|
||||
def html_element(self, tag, attrs, element_text, lineno, column):
|
||||
"""Search for links and store found URLs in a list."""
|
||||
log.debug(LOG_CHECK, "LinkFinder tag %s attrs %s", tag, attrs)
|
||||
log.debug(LOG_CHECK, "line %d col %d", lineno, column)
|
||||
|
|
@ -166,7 +166,7 @@ class LinkFinder:
|
|||
self.parse_tag(tag, attr, value, name, base, lineno, column)
|
||||
log.debug(LOG_CHECK, "LinkFinder finished tag %s", tag)
|
||||
|
||||
def get_link_name (self, tag, attrs, attr, name=None):
|
||||
def get_link_name(self, tag, attrs, attr, name=None):
|
||||
"""Parse attrs for link name. Return name of link."""
|
||||
if tag == 'a' and attr == 'href':
|
||||
if not name:
|
||||
|
|
@ -179,7 +179,7 @@ class LinkFinder:
|
|||
name = ""
|
||||
return name
|
||||
|
||||
def parse_tag (self, tag, attr, value, name, base, lineno, column):
|
||||
def parse_tag(self, tag, attr, value, name, base, lineno, column):
|
||||
"""Add given url data to url list."""
|
||||
assert isinstance(tag, str_text), repr(tag)
|
||||
assert isinstance(attr, str_text), repr(attr)
|
||||
|
|
|
|||
|
|
@ -41,7 +41,7 @@ class Translator(gettext.GNUTranslations):
|
|||
"""A translation class always installing its gettext methods into the
|
||||
default namespace."""
|
||||
|
||||
def install (self, do_unicode):
|
||||
def install(self, do_unicode):
|
||||
"""Install gettext methods into the default namespace."""
|
||||
install_builtin(self, do_unicode)
|
||||
|
||||
|
|
@ -50,7 +50,7 @@ class NullTranslator(gettext.NullTranslations):
|
|||
"""A dummy translation class always installing its gettext methods into
|
||||
the default namespace."""
|
||||
|
||||
def install (self, do_unicode):
|
||||
def install(self, do_unicode):
|
||||
"""Install gettext methods into the default namespace."""
|
||||
install_builtin(self, do_unicode)
|
||||
|
||||
|
|
|
|||
|
|
@ -99,7 +99,7 @@ class ThreadsafeIO:
|
|||
self.closed = False
|
||||
|
||||
@synchronized(_lock)
|
||||
def write (self, data):
|
||||
def write(self, data):
|
||||
"""Write given unicode data to buffer."""
|
||||
assert isinstance(data, str_text)
|
||||
if self.closed:
|
||||
|
|
@ -108,14 +108,14 @@ class ThreadsafeIO:
|
|||
self.buf.append(data)
|
||||
|
||||
@synchronized(_lock)
|
||||
def get_data (self):
|
||||
def get_data(self):
|
||||
"""Get bufferd unicode data."""
|
||||
data = "".join(self.buf)
|
||||
self.buf = []
|
||||
return data
|
||||
|
||||
@synchronized(_lock)
|
||||
def close (self):
|
||||
def close(self):
|
||||
"""Reset buffer and close this I/O object."""
|
||||
self.buf = []
|
||||
self.closed = True
|
||||
|
|
|
|||
|
|
@ -36,19 +36,19 @@ def get_lock(name, debug=False):
|
|||
class DebugLock:
|
||||
"""Debugging lock class."""
|
||||
|
||||
def __init__ (self, lock, name):
|
||||
def __init__(self, lock, name):
|
||||
"""Store lock and name parameters."""
|
||||
self.lock = lock
|
||||
self.name = name
|
||||
|
||||
def acquire (self, blocking=1):
|
||||
def acquire(self, blocking=1):
|
||||
"""Acquire lock."""
|
||||
threadname = threading.currentThread().getName()
|
||||
log.debug(LOG_THREAD, "Acquire %s for %s", self.name, threadname)
|
||||
self.lock.acquire(blocking)
|
||||
log.debug(LOG_THREAD, "...acquired %s for %s", self.name, threadname)
|
||||
|
||||
def release (self):
|
||||
def release(self):
|
||||
"""Release lock."""
|
||||
threadname = threading.currentThread().getName()
|
||||
log.debug(LOG_THREAD, "Release %s for %s", self.name, threadname)
|
||||
|
|
|
|||
|
|
@ -64,11 +64,11 @@ class LogStatistics:
|
|||
- URL lengths
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Initialize log statistics."""
|
||||
self.reset()
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""Reset all log statistics to default values."""
|
||||
# number of logged URLs
|
||||
self.number = 0
|
||||
|
|
@ -92,7 +92,7 @@ class LogStatistics:
|
|||
# overall downloaded bytes
|
||||
self.downloaded_bytes = None
|
||||
|
||||
def log_url (self, url_data, do_print):
|
||||
def log_url(self, url_data, do_print):
|
||||
"""Log URL statistics."""
|
||||
self.number += 1
|
||||
if not url_data.valid:
|
||||
|
|
@ -124,7 +124,7 @@ class LogStatistics:
|
|||
# calculate running average
|
||||
self.avg_url_length += (l - self.avg_url_length) / self.avg_number
|
||||
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Increase internal error count."""
|
||||
self.internal_errors += 1
|
||||
|
||||
|
|
@ -164,7 +164,7 @@ class _Logger(abc.ABC):
|
|||
# Default log configuration
|
||||
LoggerArgs = {}
|
||||
|
||||
def __init__ (self, **args):
|
||||
def __init__(self, **args):
|
||||
"""
|
||||
Initialize a logger, looking for part restrictions in kwargs.
|
||||
"""
|
||||
|
|
@ -198,18 +198,18 @@ class _Logger(abc.ABC):
|
|||
args.update(kwargs)
|
||||
return args
|
||||
|
||||
def get_charset_encoding (self):
|
||||
def get_charset_encoding(self):
|
||||
"""Translate the output encoding to a charset encoding name."""
|
||||
if self.output_encoding == "utf-8-sig":
|
||||
return "utf-8"
|
||||
return self.output_encoding
|
||||
|
||||
def encode (self, s):
|
||||
def encode(self, s):
|
||||
"""Encode string with output encoding."""
|
||||
assert isinstance(s, str_text)
|
||||
return s.encode(self.output_encoding, self.codec_errors)
|
||||
|
||||
def init_fileoutput (self, args):
|
||||
def init_fileoutput(self, args):
|
||||
"""
|
||||
Initialize self.fd file descriptor from args. For file output
|
||||
(used when the fileoutput arg is given), the self.fd
|
||||
|
|
@ -226,7 +226,7 @@ class _Logger(abc.ABC):
|
|||
else:
|
||||
self.fd = self.create_fd()
|
||||
|
||||
def start_fileoutput (self):
|
||||
def start_fileoutput(self):
|
||||
"""Start output to configured file."""
|
||||
path = os.path.dirname(self.filename)
|
||||
try:
|
||||
|
|
@ -243,7 +243,7 @@ class _Logger(abc.ABC):
|
|||
self.is_active = False
|
||||
self.filename = None
|
||||
|
||||
def create_fd (self):
|
||||
def create_fd(self):
|
||||
"""Create open file descriptor."""
|
||||
if self.filename is None:
|
||||
return i18n.get_encoded_writer(encoding=self.output_encoding,
|
||||
|
|
@ -251,7 +251,7 @@ class _Logger(abc.ABC):
|
|||
return codecs.open(self.filename, "wb", self.output_encoding,
|
||||
self.codec_errors)
|
||||
|
||||
def close_fileoutput (self):
|
||||
def close_fileoutput(self):
|
||||
"""
|
||||
Flush and close the file output denoted by self.fd.
|
||||
"""
|
||||
|
|
@ -269,7 +269,7 @@ class _Logger(abc.ABC):
|
|||
pass
|
||||
self.fd = None
|
||||
|
||||
def check_date (self):
|
||||
def check_date(self):
|
||||
"""
|
||||
Check for special dates.
|
||||
"""
|
||||
|
|
@ -278,14 +278,14 @@ class _Logger(abc.ABC):
|
|||
msg = _("Happy birthday for LinkChecker, I'm %d years old today!")
|
||||
self.comment(msg % (now.year - 2000))
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write a comment and a newline. This method just prints
|
||||
the given string.
|
||||
"""
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def wrap (self, lines, width):
|
||||
def wrap(self, lines, width):
|
||||
"""
|
||||
Return wrapped version of given lines.
|
||||
"""
|
||||
|
|
@ -297,7 +297,7 @@ class _Logger(abc.ABC):
|
|||
break_on_hyphens=False)
|
||||
return strformat.wrap(text, width, **kwargs).lstrip()
|
||||
|
||||
def write (self, s, **args):
|
||||
def write(self, s, **args):
|
||||
"""Write string to output descriptor. Strips control characters
|
||||
from string before writing.
|
||||
"""
|
||||
|
|
@ -318,13 +318,13 @@ class _Logger(abc.ABC):
|
|||
self.fd = dummy.Dummy()
|
||||
self.is_active = False
|
||||
|
||||
def writeln (self, s="", **args):
|
||||
def writeln(self, s="", **args):
|
||||
"""
|
||||
Write string to output descriptor plus a newline.
|
||||
"""
|
||||
self.write("%s%s" % (s, os.linesep), **args)
|
||||
|
||||
def has_part (self, name):
|
||||
def has_part(self, name):
|
||||
"""
|
||||
See if given part name will be logged.
|
||||
"""
|
||||
|
|
@ -333,19 +333,19 @@ class _Logger(abc.ABC):
|
|||
return True
|
||||
return name in self.logparts
|
||||
|
||||
def part (self, name):
|
||||
def part(self, name):
|
||||
"""
|
||||
Return translated part name.
|
||||
"""
|
||||
return _(Fields.get(name, ""))
|
||||
|
||||
def spaces (self, name):
|
||||
def spaces(self, name):
|
||||
"""
|
||||
Return indent of spaces for given part name.
|
||||
"""
|
||||
return self.logspaces[name]
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Start log output.
|
||||
"""
|
||||
|
|
@ -363,7 +363,7 @@ class _Logger(abc.ABC):
|
|||
self.stats.reset()
|
||||
self.starttime = time.time()
|
||||
|
||||
def log_filter_url (self, url_data, do_print):
|
||||
def log_filter_url(self, url_data, do_print):
|
||||
"""
|
||||
Log a new url with this logger if do_print is True. Else
|
||||
only update accounting data.
|
||||
|
|
@ -372,7 +372,7 @@ class _Logger(abc.ABC):
|
|||
if do_print:
|
||||
self.log_url(url_data)
|
||||
|
||||
def write_intro (self):
|
||||
def write_intro(self):
|
||||
"""Write intro comments."""
|
||||
self.comment(_("created by %(app)s at %(time)s") %
|
||||
{"app": configuration.AppName,
|
||||
|
|
@ -383,7 +383,7 @@ class _Logger(abc.ABC):
|
|||
{'url': configuration.SupportUrl})
|
||||
self.check_date()
|
||||
|
||||
def write_outro (self):
|
||||
def write_outro(self):
|
||||
"""Write outro comments."""
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
|
|
@ -392,32 +392,32 @@ class _Logger(abc.ABC):
|
|||
"duration": strformat.strduration_long(duration)})
|
||||
|
||||
@abc.abstractmethod
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Log a new url with this logger.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
End of output, used for cleanup (eg output buffer flushing).
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""
|
||||
Return class name.
|
||||
"""
|
||||
return self.__class__.__name__
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return class name.
|
||||
"""
|
||||
return repr(self.__class__.__name__)
|
||||
|
||||
def flush (self):
|
||||
def flush(self):
|
||||
"""
|
||||
If the logger has internal buffers, flush them.
|
||||
Ignore flush I/O errors since we are not responsible for proper
|
||||
|
|
@ -429,7 +429,7 @@ class _Logger(abc.ABC):
|
|||
except (IOError, AttributeError):
|
||||
pass
|
||||
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Indicate that an internal error occurred in the program."""
|
||||
log.warn(LOG_CHECK, "internal error occurred")
|
||||
self.stats.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class BlacklistLogger(_Logger):
|
|||
"filename": os.path.join(get_user_data(), "blacklist"),
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Intialize with old blacklist data (if found, else not)."""
|
||||
args = self.get_args(kwargs)
|
||||
super(BlacklistLogger, self).__init__(**args)
|
||||
|
|
@ -45,13 +45,13 @@ class BlacklistLogger(_Logger):
|
|||
if self.filename is not None and os.path.exists(self.filename):
|
||||
self.read_blacklist()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Put invalid url in blacklist, delete valid url from blacklist.
|
||||
"""
|
||||
|
|
@ -66,13 +66,13 @@ class BlacklistLogger(_Logger):
|
|||
if not url_data.valid:
|
||||
self.blacklist[key] = 1
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write blacklist file.
|
||||
"""
|
||||
self.write_blacklist()
|
||||
|
||||
def read_blacklist (self):
|
||||
def read_blacklist(self):
|
||||
"""
|
||||
Read a previously stored blacklist from file fd.
|
||||
"""
|
||||
|
|
@ -85,7 +85,7 @@ class BlacklistLogger(_Logger):
|
|||
value, key = line.split(None, 1)
|
||||
self.blacklist[key] = int(value)
|
||||
|
||||
def write_blacklist (self):
|
||||
def write_blacklist(self):
|
||||
"""
|
||||
Write the blacklist.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ class CSVLogger(_Logger):
|
|||
"dialect": "excel",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Store default separator and (os dependent) line terminator."""
|
||||
args = self.get_args(kwargs)
|
||||
super(CSVLogger, self).__init__(**args)
|
||||
|
|
@ -55,11 +55,11 @@ class CSVLogger(_Logger):
|
|||
self.dialect = args['dialect']
|
||||
self.linesep = os.linesep
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write CSV comment."""
|
||||
self.writeln(s="# %s" % s, **args)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write checking start info as csv comment."""
|
||||
super(CSVLogger, self).start_output()
|
||||
row = []
|
||||
|
|
@ -79,7 +79,7 @@ class CSVLogger(_Logger):
|
|||
if row:
|
||||
self.writerow(row)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write csv formatted url check info."""
|
||||
row = []
|
||||
if self.has_part("urlname"):
|
||||
|
|
@ -119,7 +119,7 @@ class CSVLogger(_Logger):
|
|||
self.writerow(map(strformat.unicode_safe, row))
|
||||
self.flush()
|
||||
|
||||
def writerow (self, row):
|
||||
def writerow(self, row):
|
||||
"""Write one row in CSV format."""
|
||||
self.writer.writerow(row)
|
||||
# Fetch UTF-8 output from the queue ...
|
||||
|
|
@ -134,7 +134,7 @@ class CSVLogger(_Logger):
|
|||
self.queue.seek(0)
|
||||
self.queue.truncate(0)
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of checking info as csv comment."""
|
||||
if self.has_part("outro"):
|
||||
self.write_outro()
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class CustomXMLLogger(xmllog._XMLLogger):
|
|||
"filename": "linkchecker-out.xml",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Write start of checking info as xml comment.
|
||||
"""
|
||||
|
|
@ -42,7 +42,7 @@ class CustomXMLLogger(xmllog._XMLLogger):
|
|||
self.xml_starttag('linkchecker', attrs)
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Log URL data in custom XML format.
|
||||
"""
|
||||
|
|
@ -95,7 +95,7 @@ class CustomXMLLogger(xmllog._XMLLogger):
|
|||
self.xml_endtag('urldata')
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write XML end tag.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ class DOTLogger(_GraphLogger):
|
|||
"encoding": "ascii",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as DOT comment."""
|
||||
super(DOTLogger, self).start_output()
|
||||
if self.has_part("intro"):
|
||||
|
|
@ -44,12 +44,12 @@ class DOTLogger(_GraphLogger):
|
|||
self.writeln(" ];")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write DOT comment."""
|
||||
self.write("// ")
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node."""
|
||||
node = self.get_node(url_data)
|
||||
if node is not None:
|
||||
|
|
@ -66,7 +66,7 @@ class DOTLogger(_GraphLogger):
|
|||
self.writeln(" extern=%d," % node["extern"])
|
||||
self.writeln(" ];")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write edge from parent to node."""
|
||||
source = dotquote(self.nodes[node["parent_url"]]["label"])
|
||||
target = dotquote(node["label"])
|
||||
|
|
@ -76,7 +76,7 @@ class DOTLogger(_GraphLogger):
|
|||
self.writeln(" valid=%d," % node["valid"])
|
||||
self.writeln(" ];")
|
||||
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end of graph marker."""
|
||||
self.writeln("}")
|
||||
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class GMLLogger(_GraphLogger):
|
|||
"filename": "linkchecker-out.gml",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as gml comment."""
|
||||
super(GMLLogger, self).start_output()
|
||||
if self.has_part("intro"):
|
||||
|
|
@ -39,11 +39,11 @@ class GMLLogger(_GraphLogger):
|
|||
self.writeln(" directed 1")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write GML comment."""
|
||||
self.writeln(s='comment "%s"' % s, **args)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node."""
|
||||
node = self.get_node(url_data)
|
||||
if node:
|
||||
|
|
@ -62,7 +62,7 @@ class GMLLogger(_GraphLogger):
|
|||
self.writeln(" extern %d" % node["extern"])
|
||||
self.writeln(" ]")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write one edge."""
|
||||
self.writeln(" edge [")
|
||||
self.writeln(' label "%s"' % node["edge"])
|
||||
|
|
@ -72,6 +72,6 @@ class GMLLogger(_GraphLogger):
|
|||
self.writeln(" valid %d" % node["valid"])
|
||||
self.writeln(" ]")
|
||||
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end of graph marker."""
|
||||
self.writeln("]")
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ import re
|
|||
class _GraphLogger(_Logger):
|
||||
"""Provide base method to get node data."""
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(_GraphLogger, self).__init__(**args)
|
||||
|
|
@ -41,7 +41,7 @@ class _GraphLogger(_Logger):
|
|||
if url_data.valid:
|
||||
self.log_url(url_data)
|
||||
|
||||
def get_node (self, url_data):
|
||||
def get_node(self, url_data):
|
||||
"""Return new node data or None if node already exists."""
|
||||
if not url_data.url:
|
||||
return None
|
||||
|
|
@ -63,7 +63,7 @@ class _GraphLogger(_Logger):
|
|||
self.nodeid += 1
|
||||
return node
|
||||
|
||||
def write_edges (self):
|
||||
def write_edges(self):
|
||||
"""
|
||||
Write all edges we can find in the graph in a brute-force manner.
|
||||
"""
|
||||
|
|
@ -73,16 +73,16 @@ class _GraphLogger(_Logger):
|
|||
self.flush()
|
||||
|
||||
@notimplemented
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write edge data for one node and its parent."""
|
||||
pass
|
||||
|
||||
@notimplemented
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end-of-graph marker."""
|
||||
pass
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write edges and end of checking info as gml comment."""
|
||||
self.write_edges()
|
||||
self.end_graph()
|
||||
|
|
|
|||
|
|
@ -31,14 +31,14 @@ class GraphXMLLogger(_XMLLogger, _GraphLogger):
|
|||
"filename": "linkchecker-out.gxml",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(GraphXMLLogger, self).__init__(**args)
|
||||
self.nodes = {}
|
||||
self.nodeid = 0
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as xml comment."""
|
||||
super(GraphXMLLogger, self).start_output()
|
||||
self.xml_start_output()
|
||||
|
|
@ -46,7 +46,7 @@ class GraphXMLLogger(_XMLLogger, _GraphLogger):
|
|||
self.xml_starttag('graph', attrs={"isDirected": "true"})
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node and all possible edges."""
|
||||
node = self.get_node(url_data)
|
||||
if node:
|
||||
|
|
@ -66,7 +66,7 @@ class GraphXMLLogger(_XMLLogger, _GraphLogger):
|
|||
self.xml_endtag("data")
|
||||
self.xml_endtag("node")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write one edge."""
|
||||
attrs = {
|
||||
"source": "%d" % self.nodes[node["parent_url"]]["id"],
|
||||
|
|
@ -80,7 +80,7 @@ class GraphXMLLogger(_XMLLogger, _GraphLogger):
|
|||
self.xml_endtag("data")
|
||||
self.xml_endtag("edge")
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Finish graph output, and print end of checking info as xml
|
||||
comment."""
|
||||
self.xml_endtag("graph")
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ class HtmlLogger(_Logger):
|
|||
'colorok': '#3ba557',
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize default HTML color values."""
|
||||
args = self.get_args(kwargs)
|
||||
super(HtmlLogger, self).__init__(**args)
|
||||
|
|
@ -88,17 +88,17 @@ class HtmlLogger(_Logger):
|
|||
self.colorerror = args['colorerror']
|
||||
self.colorok = args['colorok']
|
||||
|
||||
def part (self, name):
|
||||
def part(self, name):
|
||||
"""Return non-space-breakable part name."""
|
||||
return super(HtmlLogger, self).part(name).replace(" ", " ")
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write HTML comment."""
|
||||
self.write("<!-- ")
|
||||
self.write(s, **args)
|
||||
self.write(" -->")
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info."""
|
||||
super(HtmlLogger, self).start_output()
|
||||
header = {
|
||||
|
|
@ -125,7 +125,7 @@ class HtmlLogger(_Logger):
|
|||
self.check_date()
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write url checking info as HTML."""
|
||||
self.write_table_start()
|
||||
if self.has_part("url"):
|
||||
|
|
@ -155,21 +155,21 @@ class HtmlLogger(_Logger):
|
|||
self.write_table_end()
|
||||
self.flush()
|
||||
|
||||
def write_table_start (self):
|
||||
def write_table_start(self):
|
||||
"""Start html table."""
|
||||
self.writeln('<br/><br/><table>')
|
||||
|
||||
def write_table_end (self):
|
||||
def write_table_end(self):
|
||||
"""End html table."""
|
||||
self.write('</table><br/>')
|
||||
|
||||
def write_id (self):
|
||||
def write_id(self):
|
||||
"""Write ID for current URL."""
|
||||
self.writeln("<tr>")
|
||||
self.writeln('<td>%s</td>' % self.part("id"))
|
||||
self.write("<td>%d</td></tr>" % self.stats.number)
|
||||
|
||||
def write_url (self, url_data):
|
||||
def write_url(self, url_data):
|
||||
"""Write url_data.base_url."""
|
||||
self.writeln("<tr>")
|
||||
self.writeln('<td class="url">%s</td>' % self.part("url"))
|
||||
|
|
@ -177,12 +177,12 @@ class HtmlLogger(_Logger):
|
|||
self.write("`%s'" % html.escape(url_data.base_url))
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_name (self, url_data):
|
||||
def write_name(self, url_data):
|
||||
"""Write url_data.name."""
|
||||
args = (self.part("name"), html.escape(url_data.name))
|
||||
self.writeln("<tr><td>%s</td><td>`%s'</td></tr>" % args)
|
||||
|
||||
def write_parent (self, url_data):
|
||||
def write_parent(self, url_data):
|
||||
"""Write url_data.parent_url."""
|
||||
self.write("<tr><td>"+self.part("parenturl")+
|
||||
'</td><td><a target="top" href="'+
|
||||
|
|
@ -203,35 +203,35 @@ class HtmlLogger(_Logger):
|
|||
self.write('(<a href="'+vcss+'">CSS</a>)')
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_base (self, url_data):
|
||||
def write_base(self, url_data):
|
||||
"""Write url_data.base_ref."""
|
||||
self.writeln("<tr><td>"+self.part("base")+"</td><td>"+
|
||||
html.escape(url_data.base_ref)+"</td></tr>")
|
||||
|
||||
def write_real (self, url_data):
|
||||
def write_real(self, url_data):
|
||||
"""Write url_data.url."""
|
||||
self.writeln("<tr><td>"+self.part("realurl")+"</td><td>"+
|
||||
'<a target="top" href="'+url_data.url+
|
||||
'">'+html.escape(url_data.url)+"</a></td></tr>")
|
||||
|
||||
def write_dltime (self, url_data):
|
||||
def write_dltime(self, url_data):
|
||||
"""Write url_data.dltime."""
|
||||
self.writeln("<tr><td>"+self.part("dltime")+"</td><td>"+
|
||||
(_("%.3f seconds") % url_data.dltime)+
|
||||
"</td></tr>")
|
||||
|
||||
def write_size (self, url_data):
|
||||
def write_size(self, url_data):
|
||||
"""Write url_data.size."""
|
||||
self.writeln("<tr><td>"+self.part("dlsize")+"</td><td>"+
|
||||
strformat.strsize(url_data.size)+
|
||||
"</td></tr>")
|
||||
|
||||
def write_checktime (self, url_data):
|
||||
def write_checktime(self, url_data):
|
||||
"""Write url_data.checktime."""
|
||||
self.writeln("<tr><td>"+self.part("checktime")+"</td><td>"+
|
||||
(_("%.3f seconds") % url_data.checktime)+"</td></tr>")
|
||||
|
||||
def write_info (self, url_data):
|
||||
def write_info(self, url_data):
|
||||
"""Write url_data.info."""
|
||||
sep = "<br/>"+os.linesep
|
||||
text = sep.join(html.escape(x) for x in url_data.info)
|
||||
|
|
@ -244,7 +244,7 @@ class HtmlLogger(_Logger):
|
|||
self.writeln('<tr><td valign="top">' + self.part("modified") +
|
||||
"</td><td>"+text+"</td></tr>")
|
||||
|
||||
def write_warning (self, url_data):
|
||||
def write_warning(self, url_data):
|
||||
"""Write url_data.warnings."""
|
||||
sep = "<br/>"+os.linesep
|
||||
text = sep.join(html.escape(x[1]) for x in url_data.warnings)
|
||||
|
|
@ -252,7 +252,7 @@ class HtmlLogger(_Logger):
|
|||
'valign="top">' + self.part("warning") +
|
||||
'</td><td class="warning">' + text + "</td></tr>")
|
||||
|
||||
def write_result (self, url_data):
|
||||
def write_result(self, url_data):
|
||||
"""Write url_data.result."""
|
||||
if url_data.valid:
|
||||
self.write('<tr><td class="valid">')
|
||||
|
|
@ -268,7 +268,7 @@ class HtmlLogger(_Logger):
|
|||
self.write(": "+html.escape(url_data.result))
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_stats (self):
|
||||
def write_stats(self):
|
||||
"""Write check statistic infos."""
|
||||
self.writeln('<br/><i>%s</i><br/>' % _("Statistics"))
|
||||
if self.stats.number > 0:
|
||||
|
|
@ -285,7 +285,7 @@ class HtmlLogger(_Logger):
|
|||
self.writeln(_("No statistics available since no URLs were checked."))
|
||||
self.writeln("<br/>")
|
||||
|
||||
def write_outro (self):
|
||||
def write_outro(self):
|
||||
"""Write end of check message."""
|
||||
self.writeln("<br/>")
|
||||
self.write(_("That's it.")+" ")
|
||||
|
|
@ -326,7 +326,7 @@ class HtmlLogger(_Logger):
|
|||
configuration.SupportUrl+"</a>.<br/>"))
|
||||
self.writeln("</small></body></html>")
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of checking info as HTML."""
|
||||
if self.has_part("stats"):
|
||||
self.write_stats()
|
||||
|
|
|
|||
|
|
@ -26,23 +26,23 @@ class NoneLogger(_Logger):
|
|||
|
||||
LoggerName = 'none'
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ class SitemapXmlLogger(xmllog._XMLLogger):
|
|||
"encoding": "utf-8",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(SitemapXmlLogger, self).__init__(**args)
|
||||
|
|
@ -63,7 +63,7 @@ class SitemapXmlLogger(xmllog._XMLLogger):
|
|||
if 'priority' in args:
|
||||
self.priority = float(args['priority'])
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as xml comment."""
|
||||
super(SitemapXmlLogger, self).start_output()
|
||||
self.xml_start_output()
|
||||
|
|
@ -101,7 +101,7 @@ class SitemapXmlLogger(xmllog._XMLLogger):
|
|||
and url_data.content_type in HTML_TYPES):
|
||||
self.log_url(url_data, priority=priority)
|
||||
|
||||
def log_url (self, url_data, priority=None):
|
||||
def log_url(self, url_data, priority=None):
|
||||
"""Log URL data in sitemap format."""
|
||||
self.xml_starttag('url')
|
||||
self.xml_tag('loc', url_data.url)
|
||||
|
|
@ -112,7 +112,7 @@ class SitemapXmlLogger(xmllog._XMLLogger):
|
|||
self.xml_endtag('url')
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write XML end tag."""
|
||||
self.xml_endtag("urlset")
|
||||
self.xml_end_output()
|
||||
|
|
|
|||
|
|
@ -58,7 +58,7 @@ class SQLLogger(_Logger):
|
|||
'dbname': 'linksdb',
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize database access data."""
|
||||
args = self.get_args(kwargs)
|
||||
super(SQLLogger, self).__init__(**args)
|
||||
|
|
@ -66,14 +66,14 @@ class SQLLogger(_Logger):
|
|||
self.dbname = args['dbname']
|
||||
self.separator = args['separator']
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write SQL comment.
|
||||
"""
|
||||
self.write("-- ")
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Write start of checking info as sql comment.
|
||||
"""
|
||||
|
|
@ -83,7 +83,7 @@ class SQLLogger(_Logger):
|
|||
self.writeln()
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Store url check info into the database.
|
||||
"""
|
||||
|
|
@ -130,7 +130,7 @@ class SQLLogger(_Logger):
|
|||
})
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write end of checking info as sql comment.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class TextLogger(_Logger):
|
|||
'colorreset': "default",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize error counter and optional file output."""
|
||||
args = self.get_args(kwargs)
|
||||
super(TextLogger, self).__init__(**args)
|
||||
|
|
@ -71,27 +71,27 @@ class TextLogger(_Logger):
|
|||
self.colordlsize = args.get('colordlsize', 'default')
|
||||
self.colorreset = args.get('colorreset', 'default')
|
||||
|
||||
def init_fileoutput (self, args):
|
||||
def init_fileoutput(self, args):
|
||||
"""Colorize file output if possible."""
|
||||
super(TextLogger, self).init_fileoutput(args)
|
||||
if self.fd is not None:
|
||||
self.fd = ansicolor.Colorizer(self.fd)
|
||||
|
||||
def start_fileoutput (self):
|
||||
def start_fileoutput(self):
|
||||
"""Needed to make file descriptor color aware."""
|
||||
init_color = self.fd is None
|
||||
super(TextLogger, self).start_fileoutput()
|
||||
if init_color:
|
||||
self.fd = ansicolor.Colorizer(self.fd)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write generic start checking info."""
|
||||
super(TextLogger, self).start_output()
|
||||
if self.has_part('intro'):
|
||||
self.write_intro()
|
||||
self.flush()
|
||||
|
||||
def write_intro (self):
|
||||
def write_intro(self):
|
||||
"""Log introduction text."""
|
||||
self.writeln(configuration.AppInfo)
|
||||
self.writeln(configuration.Freeware)
|
||||
|
|
@ -104,7 +104,7 @@ class TextLogger(_Logger):
|
|||
self.writeln(_("Start checking at %s") %
|
||||
strformat.strtime(self.starttime))
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write url checking info."""
|
||||
self.writeln()
|
||||
if self.has_part('url'):
|
||||
|
|
@ -133,24 +133,24 @@ class TextLogger(_Logger):
|
|||
self.write_result(url_data)
|
||||
self.flush()
|
||||
|
||||
def write_id (self):
|
||||
def write_id(self):
|
||||
"""Write unique ID of url_data."""
|
||||
self.writeln()
|
||||
self.write(self.part('id') + self.spaces('id'))
|
||||
self.writeln("%d" % self.stats.number, color=self.colorinfo)
|
||||
|
||||
def write_url (self, url_data):
|
||||
def write_url(self, url_data):
|
||||
"""Write url_data.base_url."""
|
||||
self.write(self.part('url') + self.spaces('url'))
|
||||
txt = strformat.strline(url_data.base_url)
|
||||
self.writeln(txt, color=self.colorurl)
|
||||
|
||||
def write_name (self, url_data):
|
||||
def write_name(self, url_data):
|
||||
"""Write url_data.name."""
|
||||
self.write(self.part("name") + self.spaces("name"))
|
||||
self.writeln(strformat.strline(url_data.name), color=self.colorname)
|
||||
|
||||
def write_parent (self, url_data):
|
||||
def write_parent(self, url_data):
|
||||
"""Write url_data.parent_url."""
|
||||
self.write(self.part('parenturl') + self.spaces("parenturl"))
|
||||
txt = url_data.parent_url
|
||||
|
|
@ -162,35 +162,35 @@ class TextLogger(_Logger):
|
|||
txt += _(", page %d") % url_data.page
|
||||
self.writeln(txt, color=self.colorparent)
|
||||
|
||||
def write_base (self, url_data):
|
||||
def write_base(self, url_data):
|
||||
"""Write url_data.base_ref."""
|
||||
self.write(self.part("base") + self.spaces("base"))
|
||||
self.writeln(url_data.base_ref, color=self.colorbase)
|
||||
|
||||
def write_real (self, url_data):
|
||||
def write_real(self, url_data):
|
||||
"""Write url_data.url."""
|
||||
self.write(self.part("realurl") + self.spaces("realurl"))
|
||||
self.writeln(str_text(url_data.url), color=self.colorreal)
|
||||
|
||||
def write_dltime (self, url_data):
|
||||
def write_dltime(self, url_data):
|
||||
"""Write url_data.dltime."""
|
||||
self.write(self.part("dltime") + self.spaces("dltime"))
|
||||
self.writeln(_("%.3f seconds") % url_data.dltime,
|
||||
color=self.colordltime)
|
||||
|
||||
def write_size (self, url_data):
|
||||
def write_size(self, url_data):
|
||||
"""Write url_data.size."""
|
||||
self.write(self.part("dlsize") + self.spaces("dlsize"))
|
||||
self.writeln(strformat.strsize(url_data.size),
|
||||
color=self.colordlsize)
|
||||
|
||||
def write_checktime (self, url_data):
|
||||
def write_checktime(self, url_data):
|
||||
"""Write url_data.checktime."""
|
||||
self.write(self.part("checktime") + self.spaces("checktime"))
|
||||
self.writeln(_("%.3f seconds") % url_data.checktime,
|
||||
color=self.colordltime)
|
||||
|
||||
def write_info (self, url_data):
|
||||
def write_info(self, url_data):
|
||||
"""Write url_data.info."""
|
||||
self.write(self.part("info") + self.spaces("info"))
|
||||
self.writeln(self.wrap(url_data.info, 65), color=self.colorinfo)
|
||||
|
|
@ -200,13 +200,13 @@ class TextLogger(_Logger):
|
|||
self.write(self.part("modified") + self.spaces("modified"))
|
||||
self.writeln(self.format_modified(url_data.modified))
|
||||
|
||||
def write_warning (self, url_data):
|
||||
def write_warning(self, url_data):
|
||||
"""Write url_data.warning."""
|
||||
self.write(self.part("warning") + self.spaces("warning"))
|
||||
warning_msgs = ["[%s] %s" % x for x in url_data.warnings]
|
||||
self.writeln(self.wrap(warning_msgs, 65), color=self.colorwarning)
|
||||
|
||||
def write_result (self, url_data):
|
||||
def write_result(self, url_data):
|
||||
"""Write url_data.result."""
|
||||
self.write(self.part("result") + self.spaces("result"))
|
||||
if url_data.valid:
|
||||
|
|
@ -219,7 +219,7 @@ class TextLogger(_Logger):
|
|||
self.write(": " + url_data.result, color=color)
|
||||
self.writeln()
|
||||
|
||||
def write_outro (self, interrupt=False):
|
||||
def write_outro(self, interrupt=False):
|
||||
"""Write end of checking message."""
|
||||
self.writeln()
|
||||
if interrupt:
|
||||
|
|
@ -264,7 +264,7 @@ class TextLogger(_Logger):
|
|||
{"time": strformat.strtime(self.stoptime),
|
||||
"duration": strformat.strduration_long(duration)})
|
||||
|
||||
def write_stats (self):
|
||||
def write_stats(self):
|
||||
"""Write check statistic info."""
|
||||
self.writeln()
|
||||
self.writeln(_("Statistics:"))
|
||||
|
|
@ -282,7 +282,7 @@ class TextLogger(_Logger):
|
|||
else:
|
||||
self.writeln(_("No statistics available since no URLs were checked."))
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of output info, and flush all output buffers."""
|
||||
self.stats.downloaded_bytes = kwargs.get("downloaded_bytes")
|
||||
self.stats.num_urls = kwargs.get("num_urls")
|
||||
|
|
|
|||
|
|
@ -46,7 +46,7 @@ def xmlquoteattr(s):
|
|||
class _XMLLogger(_Logger):
|
||||
"""Base class for XML output; easy to parse with any XML tool."""
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
""" Initialize graph node list and internal id counter. """
|
||||
args = self.get_args(kwargs)
|
||||
super(_XMLLogger, self).__init__(**args)
|
||||
|
|
@ -54,7 +54,7 @@ class _XMLLogger(_Logger):
|
|||
self.indent = " "
|
||||
self.level = 0
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write XML comment.
|
||||
"""
|
||||
|
|
@ -62,7 +62,7 @@ class _XMLLogger(_Logger):
|
|||
self.write(s, **args)
|
||||
self.writeln(" -->")
|
||||
|
||||
def xml_start_output (self):
|
||||
def xml_start_output(self):
|
||||
"""
|
||||
Write start of checking info as xml comment.
|
||||
"""
|
||||
|
|
@ -72,14 +72,14 @@ class _XMLLogger(_Logger):
|
|||
self.write_intro()
|
||||
self.writeln()
|
||||
|
||||
def xml_end_output (self):
|
||||
def xml_end_output(self):
|
||||
"""
|
||||
Write end of checking info as xml comment.
|
||||
"""
|
||||
if self.has_part("outro"):
|
||||
self.write_outro()
|
||||
|
||||
def xml_starttag (self, name, attrs=None):
|
||||
def xml_starttag(self, name, attrs=None):
|
||||
"""
|
||||
Write XML start tag.
|
||||
"""
|
||||
|
|
@ -92,7 +92,7 @@ class _XMLLogger(_Logger):
|
|||
self.writeln(">")
|
||||
self.level += 1
|
||||
|
||||
def xml_endtag (self, name):
|
||||
def xml_endtag(self, name):
|
||||
"""
|
||||
Write XML end tag.
|
||||
"""
|
||||
|
|
@ -101,7 +101,7 @@ class _XMLLogger(_Logger):
|
|||
self.write(self.indent*self.level)
|
||||
self.writeln("</%s>" % xmlquote(name))
|
||||
|
||||
def xml_tag (self, name, content, attrs=None):
|
||||
def xml_tag(self, name, content, attrs=None):
|
||||
"""
|
||||
Write XML tag with content.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class AnchorCheck(_ContentPlugin):
|
|||
linkparse.AnchorTags)
|
||||
self.check_anchor(url_data)
|
||||
|
||||
def add_anchor (self, url, line, column, name, base):
|
||||
def add_anchor(self, url, line, column, name, base):
|
||||
"""Add anchor URL."""
|
||||
self.anchors.append((url, line, column, name, base))
|
||||
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class W3Timer:
|
|||
self.last_w3_call = 0
|
||||
|
||||
@synchronized(_w3_time_lock)
|
||||
def check_w3_time (self):
|
||||
def check_w3_time(self):
|
||||
"""Make sure the W3C validators are at most called once a second."""
|
||||
if time.time() - self.last_w3_call < W3Timer.SleepSeconds:
|
||||
time.sleep(W3Timer.SleepSeconds)
|
||||
|
|
|
|||
|
|
@ -73,7 +73,7 @@ class ClamavError(Exception):
|
|||
class ClamdScanner:
|
||||
"""Virus scanner using a clamd daemon process."""
|
||||
|
||||
def __init__ (self, clamav_conf):
|
||||
def __init__(self, clamav_conf):
|
||||
"""Initialize clamd daemon process sockets."""
|
||||
self.infected = []
|
||||
self.errors = []
|
||||
|
|
@ -82,7 +82,7 @@ class ClamdScanner:
|
|||
self.sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)
|
||||
self.wsock = self.new_scansock()
|
||||
|
||||
def new_scansock (self):
|
||||
def new_scansock(self):
|
||||
"""Return a connected socket for sending scan data to it."""
|
||||
port = None
|
||||
try:
|
||||
|
|
@ -108,11 +108,11 @@ class ClamdScanner:
|
|||
raise
|
||||
return wsock
|
||||
|
||||
def scan (self, data):
|
||||
def scan(self, data):
|
||||
"""Scan given data for viruses."""
|
||||
self.wsock.sendall(data)
|
||||
|
||||
def close (self):
|
||||
def close(self):
|
||||
"""Get results and close clamd daemon sockets."""
|
||||
self.wsock.close()
|
||||
data = self.sock.recv(self.sock_rcvbuf)
|
||||
|
|
@ -152,7 +152,7 @@ def get_sockinfo(host, port=None):
|
|||
class ClamavConfig(dict):
|
||||
"""Clamav configuration wrapper, with clamd connection method."""
|
||||
|
||||
def __init__ (self, filename):
|
||||
def __init__(self, filename):
|
||||
"""Parse clamav configuration file."""
|
||||
super(ClamavConfig, self).__init__()
|
||||
self.parseconf(filename)
|
||||
|
|
@ -161,7 +161,7 @@ class ClamavConfig(dict):
|
|||
if self.get('TCPSocket') and self.get('LocalSocket'):
|
||||
raise ClamavError(_("only one of TCPSocket and LocalSocket must be enabled"))
|
||||
|
||||
def parseconf (self, filename):
|
||||
def parseconf(self, filename):
|
||||
"""Parse clamav configuration from given file."""
|
||||
with open(filename) as fd:
|
||||
# yet another config format, sigh
|
||||
|
|
@ -176,7 +176,7 @@ class ClamavConfig(dict):
|
|||
else:
|
||||
self[split[0]] = split[1]
|
||||
|
||||
def new_connection (self):
|
||||
def new_connection(self):
|
||||
"""Connect to clamd for stream scanning.
|
||||
|
||||
@return: tuple (connected socket, host)
|
||||
|
|
@ -191,7 +191,7 @@ class ClamavConfig(dict):
|
|||
raise ClamavError(_("one of TCPSocket or LocalSocket must be enabled"))
|
||||
return sock, host
|
||||
|
||||
def create_local_socket (self):
|
||||
def create_local_socket(self):
|
||||
"""Create local socket, connect to it and return socket object."""
|
||||
sock = create_socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
addr = self['LocalSocket']
|
||||
|
|
@ -202,7 +202,7 @@ class ClamavConfig(dict):
|
|||
raise
|
||||
return sock
|
||||
|
||||
def create_tcp_socket (self, host):
|
||||
def create_tcp_socket(self, host):
|
||||
"""Create tcp socket, connect to it and return socket object."""
|
||||
port = int(self['TCPSocket'])
|
||||
sockinfo = get_sockinfo(host, port=port)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class RobotFileParser:
|
|||
"""This class provides a set of methods to read, parse and answer
|
||||
questions about a single robots.txt file."""
|
||||
|
||||
def __init__ (self, url='', session=None, proxies=None, auth=None):
|
||||
def __init__(self, url='', session=None, proxies=None, auth=None):
|
||||
"""Initialize internal entry lists and store given url and
|
||||
credentials."""
|
||||
self.set_url(url)
|
||||
|
|
@ -47,7 +47,7 @@ class RobotFileParser:
|
|||
self.auth = auth
|
||||
self._reset()
|
||||
|
||||
def _reset (self):
|
||||
def _reset(self):
|
||||
"""Reset internal flags and entry lists."""
|
||||
self.entries = []
|
||||
self.default_entry = None
|
||||
|
|
@ -58,7 +58,7 @@ class RobotFileParser:
|
|||
self.sitemap_urls = []
|
||||
self.encoding = None
|
||||
|
||||
def mtime (self):
|
||||
def mtime(self):
|
||||
"""Returns the time the robots.txt file was last fetched.
|
||||
|
||||
This is useful for long-running web spiders that need to
|
||||
|
|
@ -69,17 +69,17 @@ class RobotFileParser:
|
|||
"""
|
||||
return self.last_checked
|
||||
|
||||
def modified (self):
|
||||
def modified(self):
|
||||
"""Set the time the robots.txt file was last fetched to the
|
||||
current time."""
|
||||
self.last_checked = time.time()
|
||||
|
||||
def set_url (self, url):
|
||||
def set_url(self, url):
|
||||
"""Set the URL referring to a robots.txt file."""
|
||||
self.url = url
|
||||
self.host, self.path = urllib.parse.urlparse(url)[1:3]
|
||||
|
||||
def read (self):
|
||||
def read(self):
|
||||
"""Read the robots.txt URL and feeds it to the parser."""
|
||||
self._reset()
|
||||
kwargs = dict(
|
||||
|
|
@ -116,7 +116,7 @@ class RobotFileParser:
|
|||
self.allow_all = True
|
||||
log.debug(LOG_CHECK, "%r allow all (request error)", self.url)
|
||||
|
||||
def _add_entry (self, entry):
|
||||
def _add_entry(self, entry):
|
||||
"""Add a parsed entry to entry list.
|
||||
|
||||
@return: None
|
||||
|
|
@ -127,7 +127,7 @@ class RobotFileParser:
|
|||
else:
|
||||
self.entries.append(entry)
|
||||
|
||||
def parse (self, lines):
|
||||
def parse(self, lines):
|
||||
"""Parse the input lines from a robot.txt file.
|
||||
We allow that a user-agent: line is not preceded by
|
||||
one or more blank lines.
|
||||
|
|
@ -210,7 +210,7 @@ class RobotFileParser:
|
|||
self.modified()
|
||||
log.debug(LOG_CHECK, "Parsed rules:\n%s", str(self))
|
||||
|
||||
def can_fetch (self, useragent, url):
|
||||
def can_fetch(self, useragent, url):
|
||||
"""Using the parsed robots.txt decide if useragent can fetch url.
|
||||
|
||||
@return: True if agent can fetch url, else False
|
||||
|
|
@ -240,7 +240,7 @@ class RobotFileParser:
|
|||
log.debug(LOG_CHECK, " ... agent not found, allow.")
|
||||
return True
|
||||
|
||||
def get_crawldelay (self, useragent):
|
||||
def get_crawldelay(self, useragent):
|
||||
"""Look for a configured crawl delay.
|
||||
|
||||
@return: crawl delay in seconds or zero
|
||||
|
|
@ -251,7 +251,7 @@ class RobotFileParser:
|
|||
return entry.crawldelay
|
||||
return 0
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Constructs string representation, usable as contents of a
|
||||
robots.txt file.
|
||||
|
||||
|
|
@ -269,7 +269,7 @@ class RuleLine:
|
|||
(allowance==0) followed by a path.
|
||||
"""
|
||||
|
||||
def __init__ (self, path, allowance):
|
||||
def __init__(self, path, allowance):
|
||||
"""Initialize with given path and allowance info."""
|
||||
if path == '' and not allowance:
|
||||
# an empty value means allow all
|
||||
|
|
@ -278,7 +278,7 @@ class RuleLine:
|
|||
self.path = urllib.parse.quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
def applies_to (self, path):
|
||||
def applies_to(self, path):
|
||||
"""Look if given path applies to this rule.
|
||||
|
||||
@return: True if pathname applies to this rule, else False
|
||||
|
|
@ -286,7 +286,7 @@ class RuleLine:
|
|||
"""
|
||||
return self.path == "*" or path.startswith(self.path)
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Construct string representation in robots.txt format.
|
||||
|
||||
@return: robots.txt format
|
||||
|
|
@ -298,13 +298,13 @@ class RuleLine:
|
|||
class Entry:
|
||||
"""An entry has one or more user-agents and zero or more rulelines."""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Initialize user agent and rule list."""
|
||||
self.useragents = []
|
||||
self.rulelines = []
|
||||
self.crawldelay = 0
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""string representation in robots.txt format.
|
||||
|
||||
@return: robots.txt format
|
||||
|
|
@ -316,7 +316,7 @@ class Entry:
|
|||
lines.extend([str(line) for line in self.rulelines])
|
||||
return "\n".join(lines)
|
||||
|
||||
def applies_to (self, useragent):
|
||||
def applies_to(self, useragent):
|
||||
"""Check if this entry applies to the specified agent.
|
||||
|
||||
@return: True if this entry applies to the agent, else False.
|
||||
|
|
@ -333,7 +333,7 @@ class Entry:
|
|||
return True
|
||||
return False
|
||||
|
||||
def allowance (self, filename):
|
||||
def allowance(self, filename):
|
||||
"""Preconditions:
|
||||
- our agent applies to this entry
|
||||
- filename is URL decoded
|
||||
|
|
|
|||
|
|
@ -23,15 +23,15 @@ class StoppableThread(threading.Thread):
|
|||
"""Thread class with a stop() method. The thread itself has to check
|
||||
regularly for the stopped() condition."""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Store stop event."""
|
||||
super(StoppableThread, self).__init__()
|
||||
self._stopper = threading.Event()
|
||||
|
||||
def stop (self):
|
||||
def stop(self):
|
||||
"""Set stop event."""
|
||||
self._stopper.set()
|
||||
|
||||
def stopped (self, timeout=None):
|
||||
def stopped(self, timeout=None):
|
||||
"""Return True if stop event is set."""
|
||||
return self._stopper.wait(timeout)
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ file entry:
|
|||
Epilog = "\n".join((Examples, LoggerTypes, RegularExpressions, CookieFormat, ProxySupport, Notes, Retval, Warnings))
|
||||
|
||||
|
||||
def has_encoding (encoding):
|
||||
def has_encoding(encoding):
|
||||
"""Detect if Python can encode in a certain encoding."""
|
||||
try:
|
||||
codecs.lookup(encoding)
|
||||
|
|
@ -376,7 +376,7 @@ if has_argcomplete:
|
|||
argcomplete.autocomplete(argparser)
|
||||
|
||||
|
||||
def read_stdin_urls ():
|
||||
def read_stdin_urls():
|
||||
"""Read list of URLs, separated by white-space, from stdin."""
|
||||
num = 0
|
||||
while True:
|
||||
|
|
|
|||
28
setup.py
28
setup.py
|
|
@ -98,7 +98,7 @@ def get_portable():
|
|||
class MyInstallLib(install_lib):
|
||||
"""Custom library installation."""
|
||||
|
||||
def install (self):
|
||||
def install(self):
|
||||
"""Install the generated config file."""
|
||||
outs = super(MyInstallLib, self).install()
|
||||
infile = self.create_conf_file()
|
||||
|
|
@ -107,7 +107,7 @@ class MyInstallLib(install_lib):
|
|||
outs.append(outfile)
|
||||
return outs
|
||||
|
||||
def create_conf_file (self):
|
||||
def create_conf_file(self):
|
||||
"""Create configuration file."""
|
||||
cmd_obj = self.distribution.get_command_obj("install")
|
||||
cmd_obj.ensure_finalized()
|
||||
|
|
@ -141,11 +141,11 @@ class MyInstallLib(install_lib):
|
|||
self.distribution.create_conf_file(data, directory=self.install_lib)
|
||||
return self.get_conf_output()
|
||||
|
||||
def get_conf_output (self):
|
||||
def get_conf_output(self):
|
||||
"""Get name of configuration file."""
|
||||
return self.distribution.get_conf_filename(self.install_lib)
|
||||
|
||||
def get_outputs (self):
|
||||
def get_outputs(self):
|
||||
"""Add the generated config file to the list of outputs."""
|
||||
outs = super(MyInstallLib, self).get_outputs()
|
||||
conf_output = self.get_conf_output()
|
||||
|
|
@ -158,13 +158,13 @@ class MyInstallLib(install_lib):
|
|||
class MyInstallData(install_data):
|
||||
"""Fix file permissions."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Adjust permissions on POSIX systems."""
|
||||
self.install_translations()
|
||||
super(MyInstallData, self).run()
|
||||
self.fix_permissions()
|
||||
|
||||
def install_translations (self):
|
||||
def install_translations(self):
|
||||
"""Install compiled gettext catalogs."""
|
||||
# A hack to fix https://github.com/linkchecker/linkchecker/issues/102
|
||||
i18n_files = []
|
||||
|
|
@ -191,7 +191,7 @@ class MyInstallData(install_data):
|
|||
(out, _) = self.copy_file(data, dest)
|
||||
self.outfiles.append(out)
|
||||
|
||||
def fix_permissions (self):
|
||||
def fix_permissions(self):
|
||||
"""Set correct read permissions on POSIX systems. Might also
|
||||
be possible by setting umask?"""
|
||||
if os.name == 'posix' and not self.dry_run:
|
||||
|
|
@ -208,12 +208,12 @@ class MyInstallData(install_data):
|
|||
class MyDistribution(Distribution):
|
||||
"""Custom distribution class generating config file."""
|
||||
|
||||
def __init__ (self, attrs):
|
||||
def __init__(self, attrs):
|
||||
"""Set console and windows scripts."""
|
||||
super(MyDistribution, self).__init__(attrs)
|
||||
self.console = ['linkchecker']
|
||||
|
||||
def run_commands (self):
|
||||
def run_commands(self):
|
||||
"""Generate config file and run commands."""
|
||||
cwd = os.getcwd()
|
||||
data = []
|
||||
|
|
@ -223,11 +223,11 @@ class MyDistribution(Distribution):
|
|||
self.create_conf_file(data)
|
||||
super(MyDistribution, self).run_commands()
|
||||
|
||||
def get_conf_filename (self, directory):
|
||||
def get_conf_filename(self, directory):
|
||||
"""Get name for config file."""
|
||||
return os.path.join(directory, "_%s_configdata.py" % self.get_name())
|
||||
|
||||
def create_conf_file (self, data, directory=None):
|
||||
def create_conf_file(self, data, directory=None):
|
||||
"""Create local config file from given data (list of lines) in
|
||||
the directory (or current directory if not given)."""
|
||||
data.insert(0, "# this file is automatically created by setup.py")
|
||||
|
|
@ -287,7 +287,7 @@ def check_manifest():
|
|||
class MyBuild(build):
|
||||
"""Custom build command."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Check MANIFEST before building."""
|
||||
check_manifest()
|
||||
build.run(self)
|
||||
|
|
@ -296,7 +296,7 @@ class MyBuild(build):
|
|||
class MyClean(clean):
|
||||
"""Custom clean command."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Remove share directory on clean."""
|
||||
if self.all:
|
||||
# remove share directory
|
||||
|
|
@ -311,7 +311,7 @@ class MyClean(clean):
|
|||
class MySdist(sdist):
|
||||
"""Custom sdist command."""
|
||||
|
||||
def get_file_list (self):
|
||||
def get_file_list(self):
|
||||
"""Add MANIFEST to the file list."""
|
||||
super(MySdist, self).get_file_list()
|
||||
self.filelist.append("MANIFEST")
|
||||
|
|
|
|||
|
|
@ -59,9 +59,9 @@ def run_silent(cmd):
|
|||
|
||||
def _need_func(testfunc, name):
|
||||
"""Decorator skipping test if given testfunc fails."""
|
||||
def check_func (func):
|
||||
def check_func(func):
|
||||
@wraps(func)
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
if not testfunc():
|
||||
pytest.skip("%s is not available" % name)
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -185,8 +185,8 @@ def has_newsserver(server):
|
|||
|
||||
def need_newsserver(server):
|
||||
"""Decorator skipping test if newsserver is not available."""
|
||||
def check_func (func):
|
||||
def newfunc (*args, **kwargs):
|
||||
def check_func(func):
|
||||
def newfunc(*args, **kwargs):
|
||||
if not has_newsserver(server):
|
||||
pytest.skip("Newsserver `%s' is not available" % server)
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -239,8 +239,8 @@ def _limit_time(seconds):
|
|||
|
||||
def limit_time(seconds, skip=False):
|
||||
"""Limit test time to the given number of seconds, else fail or skip."""
|
||||
def run_limited (func):
|
||||
def new_func (*args, **kwargs):
|
||||
def run_limited(func):
|
||||
def new_func(*args, **kwargs):
|
||||
try:
|
||||
with _limit_time(seconds):
|
||||
return func(*args, **kwargs)
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ class TestLogger(linkcheck.logger._Logger):
|
|||
'url',
|
||||
]
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
The kwargs must have "expected" keyword with the expected logger
|
||||
output lines.
|
||||
|
|
@ -81,13 +81,13 @@ class TestLogger(linkcheck.logger._Logger):
|
|||
flags=re.DOTALL | re.MULTILINE)
|
||||
if x])).splitlines()
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Nothing to do here.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Append logger output to self.result.
|
||||
"""
|
||||
|
|
@ -131,7 +131,7 @@ class TestLogger(linkcheck.logger._Logger):
|
|||
# note: do not append url_data.result since this is
|
||||
# platform dependent
|
||||
|
||||
def end_output (self, linknumber=-1, **kwargs):
|
||||
def end_output(self, linknumber=-1, **kwargs):
|
||||
"""
|
||||
Stores differences between expected and result in self.diff.
|
||||
"""
|
||||
|
|
@ -189,17 +189,17 @@ class LinkCheckTest(unittest.TestCase):
|
|||
"""
|
||||
logger = TestLogger
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Ensure the current locale setting is the default.
|
||||
Otherwise, warnings will get translated and will break tests."""
|
||||
super(LinkCheckTest, self).setUp()
|
||||
linkcheck.init_i18n(loc='C')
|
||||
|
||||
def norm (self, url, encoding="utf-8"):
|
||||
def norm(self, url, encoding="utf-8"):
|
||||
"""Helper function to norm a url."""
|
||||
return linkcheck.url.url_norm(url, encoding=encoding)[0]
|
||||
|
||||
def get_attrs (self, **kwargs):
|
||||
def get_attrs(self, **kwargs):
|
||||
"""Return current and data directory as dictionary.
|
||||
You can augment the dict with keyword attributes."""
|
||||
d = {
|
||||
|
|
@ -209,7 +209,7 @@ class LinkCheckTest(unittest.TestCase):
|
|||
d.update(kwargs)
|
||||
return d
|
||||
|
||||
def get_resultlines (self, filename):
|
||||
def get_resultlines(self, filename):
|
||||
"""
|
||||
Return contents of file, as list of lines without line endings,
|
||||
ignoring empty lines and lines starting with a hash sign (#).
|
||||
|
|
@ -230,7 +230,7 @@ class LinkCheckTest(unittest.TestCase):
|
|||
"""Get URL for given filename."""
|
||||
return get_file(filename)
|
||||
|
||||
def file_test (self, filename, confargs=None):
|
||||
def file_test(self, filename, confargs=None):
|
||||
"""Check <filename> with expected result in <filename>.result."""
|
||||
url = self.get_url(filename)
|
||||
if confargs is None:
|
||||
|
|
@ -249,7 +249,7 @@ class LinkCheckTest(unittest.TestCase):
|
|||
self.fail("%d internal errors occurred!"
|
||||
% logger.stats.internal_errors)
|
||||
|
||||
def direct (self, url, resultlines, parts=None, recursionlevel=0,
|
||||
def direct(self, url, resultlines, parts=None, recursionlevel=0,
|
||||
confargs=None, url_encoding=None):
|
||||
"""Check url with expected result."""
|
||||
assert isinstance(url, str_text), repr(url)
|
||||
|
|
@ -276,15 +276,15 @@ class LinkCheckTest(unittest.TestCase):
|
|||
class MailTest(LinkCheckTest):
|
||||
"""Test mailto: link checking."""
|
||||
|
||||
def mail_valid (self, addr, **kwargs):
|
||||
def mail_valid(self, addr, **kwargs):
|
||||
"""Test valid mail address."""
|
||||
return self.mail_test(addr, "valid", **kwargs)
|
||||
|
||||
def mail_error (self, addr, **kwargs):
|
||||
def mail_error(self, addr, **kwargs):
|
||||
"""Test error mail address."""
|
||||
return self.mail_test(addr, "error", **kwargs)
|
||||
|
||||
def mail_test (self, addr, result, encoding="utf-8", cache_key=None, warning=None):
|
||||
def mail_test(self, addr, result, encoding="utf-8", cache_key=None, warning=None):
|
||||
"""Test mail address."""
|
||||
url = self.norm(addr, encoding=encoding)
|
||||
if cache_key is None:
|
||||
|
|
|
|||
|
|
@ -29,18 +29,18 @@ TIMEOUT = 5
|
|||
class FtpServerTest(LinkCheckTest):
|
||||
"""Start/stop an FTP server that can be used for testing."""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""Init test class and store default ftp server port."""
|
||||
super(FtpServerTest, self).__init__(methodName=methodName)
|
||||
self.host = 'localhost'
|
||||
self.port = None
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Start a new FTP server in a new thread."""
|
||||
self.port = start_server(self.host, 0)
|
||||
self.assertFalse(self.port is None)
|
||||
|
||||
def tearDown (self):
|
||||
def tearDown(self):
|
||||
"""Send stop request to server."""
|
||||
try:
|
||||
stop_server(self.host, self.port)
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class StoppableHttpRequestHandler(SimpleHTTPRequestHandler):
|
|||
HTTP request handler with QUIT stopping the server.
|
||||
"""
|
||||
|
||||
def do_QUIT (self):
|
||||
def do_QUIT(self):
|
||||
"""
|
||||
Send 200 OK response, and set server.stop to True.
|
||||
"""
|
||||
|
|
@ -43,7 +43,7 @@ class StoppableHttpRequestHandler(SimpleHTTPRequestHandler):
|
|||
self.end_headers()
|
||||
self.server.stop = True
|
||||
|
||||
def log_message (self, format, *args):
|
||||
def log_message(self, format, *args):
|
||||
"""
|
||||
Logging is disabled.
|
||||
"""
|
||||
|
|
@ -60,7 +60,7 @@ class StoppableHttpServer(HTTPServer):
|
|||
HTTP server that reacts to self.stop flag.
|
||||
"""
|
||||
|
||||
def serve_forever (self):
|
||||
def serve_forever(self):
|
||||
"""
|
||||
Handle one request at a time until stopped.
|
||||
"""
|
||||
|
|
@ -75,7 +75,7 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
|
|||
listings.
|
||||
"""
|
||||
|
||||
def remove_path_query (self):
|
||||
def remove_path_query(self):
|
||||
"""
|
||||
Remove everything after a question mark.
|
||||
"""
|
||||
|
|
@ -90,7 +90,7 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
|
|||
return status
|
||||
return 500
|
||||
|
||||
def do_GET (self):
|
||||
def do_GET(self):
|
||||
"""
|
||||
Removes query part of GET request.
|
||||
"""
|
||||
|
|
@ -104,7 +104,7 @@ class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
|
|||
else:
|
||||
super(NoQueryHttpRequestHandler, self).do_GET()
|
||||
|
||||
def do_HEAD (self):
|
||||
def do_HEAD(self):
|
||||
"""
|
||||
Removes query part of HEAD request.
|
||||
"""
|
||||
|
|
@ -152,7 +152,7 @@ class HttpServerTest(LinkCheckTest):
|
|||
Start/stop an HTTP server that can be used for testing.
|
||||
"""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""
|
||||
Init test class and store default http server port.
|
||||
"""
|
||||
|
|
@ -246,27 +246,27 @@ def get_cookie(maxage=2000):
|
|||
class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
|
||||
"""Handler redirecting certain requests, and setting cookies."""
|
||||
|
||||
def end_headers (self):
|
||||
def end_headers(self):
|
||||
"""Send cookie before ending headers."""
|
||||
self.send_header("Set-Cookie", get_cookie())
|
||||
self.send_header("Set-Cookie", get_cookie(maxage=0))
|
||||
super(CookieRedirectHttpRequestHandler, self).end_headers()
|
||||
|
||||
def redirect (self):
|
||||
def redirect(self):
|
||||
"""Redirect request."""
|
||||
path = self.path.replace("redirect", "newurl")
|
||||
self.send_response(302)
|
||||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def redirect_newhost (self):
|
||||
def redirect_newhost(self):
|
||||
"""Redirect request to a new host."""
|
||||
path = "http://www.example.com/"
|
||||
self.send_response(302)
|
||||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def redirect_newscheme (self):
|
||||
def redirect_newscheme(self):
|
||||
"""Redirect request to a new scheme."""
|
||||
if "file" in self.path:
|
||||
path = "file:README.md"
|
||||
|
|
@ -276,7 +276,7 @@ class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
|
|||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def do_GET (self):
|
||||
def do_GET(self):
|
||||
"""Handle redirections for GET."""
|
||||
if "redirect_newscheme" in self.path:
|
||||
self.redirect_newscheme()
|
||||
|
|
@ -287,7 +287,7 @@ class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
|
|||
else:
|
||||
super(CookieRedirectHttpRequestHandler, self).do_GET()
|
||||
|
||||
def do_HEAD (self):
|
||||
def do_HEAD(self):
|
||||
"""Handle redirections for HEAD."""
|
||||
if "redirect_newscheme" in self.path:
|
||||
self.redirect_newscheme()
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ TIMEOUT = 5
|
|||
class TelnetServerTest(LinkCheckTest):
|
||||
"""Start/stop a Telnet server that can be used for testing."""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""Init test class and store default ftp server port."""
|
||||
super(TelnetServerTest, self).__init__(methodName=methodName)
|
||||
self.host = 'localhost'
|
||||
|
|
@ -46,7 +46,7 @@ class TelnetServerTest(LinkCheckTest):
|
|||
netloc = self.host
|
||||
return "telnet://%s:%d" % (netloc, self.port)
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Start a new Telnet server in a new thread."""
|
||||
self.port, self.server_thread = start_server(self.host, 0, self.stop_event)
|
||||
self.assertFalse(self.port is None)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class TestAnchor(LinkCheckTest):
|
|||
Test anchor checking of HTML pages.
|
||||
"""
|
||||
|
||||
def test_anchor (self):
|
||||
def test_anchor(self):
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
|
|
|
|||
|
|
@ -24,14 +24,14 @@ class TestBase(LinkCheckTest):
|
|||
Test links of base*.html files.
|
||||
"""
|
||||
|
||||
def test_base1 (self):
|
||||
def test_base1(self):
|
||||
self.file_test("base1.html")
|
||||
|
||||
def test_base2 (self):
|
||||
def test_base2(self):
|
||||
self.file_test("base2.html")
|
||||
|
||||
def test_base3 (self):
|
||||
def test_base3(self):
|
||||
self.file_test("base3.html")
|
||||
|
||||
def test_base4 (self):
|
||||
def test_base4(self):
|
||||
self.file_test("base4.html")
|
||||
|
|
|
|||
|
|
@ -27,27 +27,27 @@ class TestBookmarks(LinkCheckTest):
|
|||
"""
|
||||
|
||||
@need_network
|
||||
def test_firefox_bookmarks (self):
|
||||
def test_firefox_bookmarks(self):
|
||||
# firefox 3 bookmark file parsing
|
||||
self.file_test("places.sqlite")
|
||||
|
||||
@need_network
|
||||
def test_opera_bookmarks (self):
|
||||
def test_opera_bookmarks(self):
|
||||
# Opera bookmark file parsing
|
||||
self.file_test("opera6.adr")
|
||||
|
||||
@need_network
|
||||
def test_chromium_bookmarks (self):
|
||||
def test_chromium_bookmarks(self):
|
||||
# Chromium and Google Chrome bookmark file parsing
|
||||
self.file_test("Bookmarks")
|
||||
|
||||
@need_network
|
||||
def test_safari_bookmarks_xml (self):
|
||||
def test_safari_bookmarks_xml(self):
|
||||
# Safari bookmark file parsing (for plaintext plist files)
|
||||
self.file_test(os.path.join("plist_xml", "Bookmarks.plist"))
|
||||
|
||||
@need_network
|
||||
@need_biplist
|
||||
def test_safari_bookmarks_binary (self):
|
||||
def test_safari_bookmarks_binary(self):
|
||||
# Safari bookmark file parsing (for binary plist files)
|
||||
self.file_test(os.path.join("plist_binary", "Bookmarks.plist"))
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class TestError(LinkCheckTest):
|
|||
Test unrecognized or syntactically wrong links.
|
||||
"""
|
||||
|
||||
def test_unrecognized (self):
|
||||
def test_unrecognized(self):
|
||||
# Unrecognized scheme
|
||||
url = "hutzli:"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -38,7 +38,7 @@ class TestError(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid1 (self):
|
||||
def test_invalid1(self):
|
||||
# invalid scheme chars
|
||||
url = "äöü:"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -52,7 +52,7 @@ class TestError(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid2 (self):
|
||||
def test_invalid2(self):
|
||||
# missing scheme alltogether
|
||||
url = "äöü"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -66,7 +66,7 @@ class TestError(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid3 (self):
|
||||
def test_invalid3(self):
|
||||
# really fucked up
|
||||
url = "@³²¼][½ ³@] ¬½"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
|
|||
|
|
@ -47,29 +47,29 @@ class TestFile(LinkCheckTest):
|
|||
Test file:// link checking (and file content parsing).
|
||||
"""
|
||||
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.file_test("file.html")
|
||||
|
||||
def test_html_url_quote (self):
|
||||
def test_html_url_quote(self):
|
||||
self.file_test("file_url_quote.html")
|
||||
|
||||
def test_wml (self):
|
||||
def test_wml(self):
|
||||
self.file_test("file.wml")
|
||||
|
||||
def test_text (self):
|
||||
def test_text(self):
|
||||
self.file_test("file.txt")
|
||||
|
||||
def test_asc (self):
|
||||
def test_asc(self):
|
||||
self.file_test("file.asc")
|
||||
|
||||
def test_css (self):
|
||||
def test_css(self):
|
||||
self.file_test("file.css")
|
||||
|
||||
def test_php (self):
|
||||
def test_php(self):
|
||||
self.file_test("file.php")
|
||||
|
||||
@need_word
|
||||
def test_word (self):
|
||||
def test_word(self):
|
||||
confargs = dict(enabledplugins=["WordParser"])
|
||||
self.file_test("file.doc", confargs=confargs)
|
||||
|
||||
|
|
@ -82,11 +82,11 @@ class TestFile(LinkCheckTest):
|
|||
confargs = dict(enabledplugins=["MarkdownCheck"])
|
||||
self.file_test("file.markdown", confargs=confargs)
|
||||
|
||||
def test_urllist (self):
|
||||
def test_urllist(self):
|
||||
self.file_test("urllist.txt")
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_directory_listing (self):
|
||||
def test_directory_listing(self):
|
||||
# unpack non-unicode filename which cannot be stored
|
||||
# in the SF subversion repository
|
||||
if os.name != 'posix' or sys.platform != 'linux2':
|
||||
|
|
@ -96,11 +96,11 @@ class TestFile(LinkCheckTest):
|
|||
unzip(dirname + ".zip", os.path.dirname(dirname))
|
||||
self.file_test("dir")
|
||||
|
||||
def test_unicode_filename (self):
|
||||
def test_unicode_filename(self):
|
||||
# a unicode filename
|
||||
self.file_test("Мошкова.bin")
|
||||
|
||||
def test_good_file (self):
|
||||
def test_good_file(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -111,7 +111,7 @@ class TestFile(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_bad_file (self):
|
||||
def test_bad_file(self):
|
||||
if os.name == 'nt':
|
||||
# Fails on NT platforms and I am too lazy to fix
|
||||
# Cause: url get quoted %7C which gets lowercased to
|
||||
|
|
@ -127,7 +127,7 @@ class TestFile(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_file_missing_dslash (self):
|
||||
def test_good_file_missing_dslash(self):
|
||||
# good file (missing double slash)
|
||||
attrs = self.get_attrs()
|
||||
url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs
|
||||
|
|
@ -139,7 +139,7 @@ class TestFile(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_dir (self):
|
||||
def test_good_dir(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs()
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -149,7 +149,7 @@ class TestFile(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_dir_space (self):
|
||||
def test_good_dir_space(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()
|
||||
|
|
|
|||
|
|
@ -24,5 +24,5 @@ class TestFrames(LinkCheckTest):
|
|||
Test link checking of HTML framesets.
|
||||
"""
|
||||
|
||||
def test_frames (self):
|
||||
def test_frames(self):
|
||||
self.file_test("frames.html")
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ class TestFtp(FtpServerTest):
|
|||
"""Test ftp: link checking."""
|
||||
|
||||
@need_pyftpdlib
|
||||
def test_ftp (self):
|
||||
def test_ftp(self):
|
||||
# ftp two slashes
|
||||
url = "ftp://%s:%d/" % (self.host, self.port)
|
||||
resultlines = [
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ class TestHttp(HttpServerTest):
|
|||
self.handler = CookieRedirectHttpRequestHandler
|
||||
|
||||
@need_network
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
confargs = dict(recursionlevel=1)
|
||||
self.file_test("http.html", confargs=confargs)
|
||||
self.file_test("http_lowercase.html", confargs=confargs)
|
||||
|
|
|
|||
|
|
@ -23,11 +23,11 @@ class TestHttpMisc(HttpServerTest):
|
|||
"""Test http:// misc link checking."""
|
||||
|
||||
@need_network
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.swf_test()
|
||||
self.file_test("sitemap.xml")
|
||||
|
||||
def swf_test (self):
|
||||
def swf_test(self):
|
||||
url = self.get_url("test.swf")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -27,14 +27,14 @@ class TestHttpRedirect(HttpServerTest):
|
|||
self.handler = CookieRedirectHttpRequestHandler
|
||||
|
||||
@need_network
|
||||
def test_redirect (self):
|
||||
def test_redirect(self):
|
||||
self.redirect1()
|
||||
self.redirect2()
|
||||
self.redirect3()
|
||||
self.redirect4()
|
||||
self.redirect5()
|
||||
|
||||
def redirect1 (self):
|
||||
def redirect1(self):
|
||||
url = "http://localhost:%d/redirect1" % self.port
|
||||
nurl = url
|
||||
rurl = url.replace("redirect", "newurl")
|
||||
|
|
@ -47,7 +47,7 @@ class TestHttpRedirect(HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=0)
|
||||
|
||||
def redirect2 (self):
|
||||
def redirect2(self):
|
||||
url = "http://localhost:%d/tests/checker/data/redirect.html" % \
|
||||
self.port
|
||||
nurl = url
|
||||
|
|
@ -61,12 +61,12 @@ class TestHttpRedirect(HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
||||
def redirect3 (self):
|
||||
def redirect3(self):
|
||||
url = "http://localhost:%d/tests/checker/data/redir.html" % self.port
|
||||
resultlines = self.get_resultlines("redir.html")
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
||||
def redirect4 (self):
|
||||
def redirect4(self):
|
||||
url = "http://localhost:%d/redirect_newscheme_ftp" % self.port
|
||||
nurl = url
|
||||
#rurl = "ftp://example.com/"
|
||||
|
|
@ -84,7 +84,7 @@ class TestHttpRedirect(HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
||||
def redirect5 (self):
|
||||
def redirect5(self):
|
||||
url = "http://localhost:%d/redirect_newscheme_file" % self.port
|
||||
nurl = url
|
||||
#rurl = "file:README"
|
||||
|
|
|
|||
|
|
@ -21,11 +21,11 @@ from .httpserver import HttpServerTest
|
|||
class TestHttpRobots(HttpServerTest):
|
||||
"""Test robots.txt link checking behaviour."""
|
||||
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.robots_txt_test()
|
||||
self.robots_txt2_test()
|
||||
|
||||
def robots_txt_test (self):
|
||||
def robots_txt_test(self):
|
||||
url = "http://localhost:%d/robots.txt" % self.port
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -35,7 +35,7 @@ class TestHttpRobots(HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=5)
|
||||
|
||||
def robots_txt2_test (self):
|
||||
def robots_txt2_test(self):
|
||||
url = "http://localhost:%d/secret" % self.port
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class TestHttps(HttpsServerTest):
|
|||
f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert))
|
||||
|
||||
@need_network
|
||||
def test_https (self):
|
||||
def test_https(self):
|
||||
url = self.get_url("")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ class TestHttpsRedirect(HttpServerTest):
|
|||
super(TestHttpsRedirect, self).__init__(methodName=methodName)
|
||||
self.handler = RedirectHttpsRequestHandler
|
||||
|
||||
def test_redirect (self):
|
||||
def test_redirect(self):
|
||||
url = "http://localhost:%d/redirect1" % self.port
|
||||
nurl = url
|
||||
#rurl = "https://localhost:%d/newurl1" % self.port
|
||||
|
|
@ -46,7 +46,7 @@ class TestHttpsRedirect(HttpServerTest):
|
|||
|
||||
class RedirectHttpsRequestHandler(CookieRedirectHttpRequestHandler):
|
||||
|
||||
def redirect (self):
|
||||
def redirect(self):
|
||||
"""Redirect request."""
|
||||
path = self.path.replace("redirect", "newurl")
|
||||
port = self.server.server_address[1]
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from .__init__ import LinkCheckTest, get_url_from
|
|||
class TestInternpat(LinkCheckTest):
|
||||
"""Test internal pattern."""
|
||||
|
||||
def test_trailing_slash (self):
|
||||
def test_trailing_slash(self):
|
||||
# Make sure a trailing slash is not lost
|
||||
config = linkcheck.configuration.Configuration()
|
||||
aggregate = linkcheck.director.get_aggregate(config)
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from . import MailTest
|
|||
class TestMailBad(MailTest):
|
||||
"""Test mailto: link checking."""
|
||||
|
||||
def test_error_mail (self):
|
||||
def test_error_mail(self):
|
||||
# too long or too short
|
||||
self.mail_error("mailto:@")
|
||||
self.mail_error("mailto:@example.org")
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class TestMailGood(MailTest):
|
|||
"""
|
||||
|
||||
@need_network
|
||||
def test_good_mail (self):
|
||||
def test_good_mail(self):
|
||||
# some good mailto addrs
|
||||
url = self.norm("mailto:Dude <calvin@users.sourceforge.net> , "\
|
||||
"Killer <calvin@users.sourceforge.net>?subject=bla")
|
||||
|
|
@ -84,7 +84,7 @@ class TestMailGood(MailTest):
|
|||
self.direct(url, resultlines)
|
||||
|
||||
@need_network
|
||||
def test_warn_mail (self):
|
||||
def test_warn_mail(self):
|
||||
# some mailto addrs with warnings
|
||||
# contains non-quoted characters
|
||||
url = "mailto:calvin@users.sourceforge.net?subject=\xe4\xf6\xfc"
|
||||
|
|
@ -123,27 +123,27 @@ class TestMailGood(MailTest):
|
|||
cache_key=url)
|
||||
|
||||
@need_network
|
||||
def test_valid_mail1 (self):
|
||||
def test_valid_mail1(self):
|
||||
for char in "!#$&'":
|
||||
self._mail_valid_unverified(char)
|
||||
|
||||
@need_network
|
||||
def test_valid_mail2 (self):
|
||||
def test_valid_mail2(self):
|
||||
for char in "*+-/=":
|
||||
self._mail_valid_unverified(char)
|
||||
|
||||
@need_network
|
||||
def test_valid_mail3 (self):
|
||||
def test_valid_mail3(self):
|
||||
for char in "^_`.":
|
||||
self._mail_valid_unverified(char)
|
||||
|
||||
@need_network
|
||||
def test_valid_mail4 (self):
|
||||
def test_valid_mail4(self):
|
||||
for char in "{|}~":
|
||||
self._mail_valid_unverified(char)
|
||||
|
||||
@need_network
|
||||
def test_unicode_mail (self):
|
||||
def test_unicode_mail(self):
|
||||
mailto = "mailto:\xf6lvin@users.sourceforge.net"
|
||||
url = self.norm(mailto, encoding="iso-8859-1")
|
||||
resultlines = [
|
||||
|
|
|
|||
|
|
@ -26,17 +26,17 @@ class TestMisc(LinkCheckTest):
|
|||
"""
|
||||
|
||||
@need_network
|
||||
def test_misc (self):
|
||||
def test_misc(self):
|
||||
self.file_test("misc.html")
|
||||
|
||||
def test_html5 (self):
|
||||
def test_html5(self):
|
||||
self.file_test("html5.html")
|
||||
|
||||
def test_utf8 (self):
|
||||
def test_utf8(self):
|
||||
self.file_test("utf8.html")
|
||||
|
||||
@need_network
|
||||
def test_archive (self):
|
||||
def test_archive(self):
|
||||
self.file_test("archive.html")
|
||||
|
||||
@need_network
|
||||
|
|
|
|||
|
|
@ -35,10 +35,10 @@ NNTP_TIMEOUT_SECS = 30
|
|||
class TestNews(LinkCheckTest):
|
||||
"""Test nntp: and news: link checking."""
|
||||
|
||||
def newstest (self, url, resultlines):
|
||||
def newstest(self, url, resultlines):
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_news_without_host (self):
|
||||
def test_news_without_host(self):
|
||||
# news testing
|
||||
url = "news:comp.os.linux.misc"
|
||||
resultlines = [
|
||||
|
|
@ -60,7 +60,7 @@ class TestNews(LinkCheckTest):
|
|||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_snews_with_group (self):
|
||||
def test_snews_with_group(self):
|
||||
url = "snews:de.comp.os.unix.linux.misc"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -72,7 +72,7 @@ class TestNews(LinkCheckTest):
|
|||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_illegal_syntax (self):
|
||||
def test_illegal_syntax(self):
|
||||
# illegal syntax
|
||||
url = "news:§$%&/´`(§%"
|
||||
qurl = self.norm(url)
|
||||
|
|
@ -87,7 +87,7 @@ class TestNews(LinkCheckTest):
|
|||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_nntp_with_host (self):
|
||||
def test_nntp_with_host(self):
|
||||
url = "nntp://%s/comp.lang.python" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -101,7 +101,7 @@ class TestNews(LinkCheckTest):
|
|||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_article_span (self):
|
||||
def test_article_span(self):
|
||||
url = "nntp://%s/comp.lang.python/1-5" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -113,7 +113,7 @@ class TestNews(LinkCheckTest):
|
|||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_article_span_no_host (self):
|
||||
def test_article_span_no_host(self):
|
||||
url = "news:comp.lang.python/1-5"
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -126,7 +126,7 @@ class TestNews(LinkCheckTest):
|
|||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_host_no_group (self):
|
||||
def test_host_no_group(self):
|
||||
url = "nntp://%s/" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from . import httpserver
|
|||
class TestProxy(httpserver.HttpServerTest):
|
||||
"""Test no_proxy env var handling."""
|
||||
|
||||
def test_noproxy (self):
|
||||
def test_noproxy(self):
|
||||
# set env vars
|
||||
with EnvironmentVarGuard() as env:
|
||||
env.set("http_proxy", "http://example.org:8877")
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from .telnetserver import TelnetServerTest
|
|||
class TestTelnet(TelnetServerTest):
|
||||
"""Test telnet: link checking."""
|
||||
|
||||
def test_telnet_error (self):
|
||||
def test_telnet_error(self):
|
||||
url = "telnet:"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -33,7 +33,7 @@ class TestTelnet(TelnetServerTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_telnet_localhost (self):
|
||||
def test_telnet_localhost(self):
|
||||
url = self.get_url()
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from . import LinkCheckTest
|
|||
class TestUnknown(LinkCheckTest):
|
||||
"""Test unknown URL scheme checking."""
|
||||
|
||||
def test_skype (self):
|
||||
def test_skype(self):
|
||||
url = "skype:"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -34,7 +34,7 @@ class TestUnknown(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_irc (self):
|
||||
def test_irc(self):
|
||||
url = "irc://example.org"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -56,7 +56,7 @@ class TestUnknown(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_steam (self):
|
||||
def test_steam(self):
|
||||
url = "steam://connect/example.org"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -68,7 +68,7 @@ class TestUnknown(LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_feed (self):
|
||||
def test_feed(self):
|
||||
url = "feed:https://example.com/entries.atom"
|
||||
nurl = "feed:https%3A/example.com/entries.atom"
|
||||
resultlines = [
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ class TestWhitespace(LinkCheckTest):
|
|||
"""
|
||||
|
||||
@need_network
|
||||
def test_leading_whitespace (self):
|
||||
def test_leading_whitespace(self):
|
||||
# Leading whitespace
|
||||
url = " http://www.example.org/"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -52,7 +52,7 @@ class TestWhitespace(LinkCheckTest):
|
|||
self.direct(url, resultlines)
|
||||
|
||||
@need_network
|
||||
def test_trailing_whitespace (self):
|
||||
def test_trailing_whitespace(self):
|
||||
# Trailing whitespace
|
||||
url = "http://www.example.org/ "
|
||||
resultlines = [
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ def get_file(filename=None):
|
|||
class TestConfig(unittest.TestCase):
|
||||
"""Test configuration parsing."""
|
||||
|
||||
def test_confparse (self):
|
||||
def test_confparse(self):
|
||||
config = linkcheck.configuration.Configuration()
|
||||
files = [get_file("config0.ini")]
|
||||
config.read(files)
|
||||
|
|
@ -147,12 +147,12 @@ class TestConfig(unittest.TestCase):
|
|||
self.assertEqual(config["gxml"]["parts"], ["realurl"])
|
||||
self.assertEqual(config["gxml"]["encoding"], "utf-8")
|
||||
|
||||
def test_confparse_error1 (self):
|
||||
def test_confparse_error1(self):
|
||||
config = linkcheck.configuration.Configuration()
|
||||
files = [get_file("config1.ini")]
|
||||
self.assertRaises(linkcheck.LinkCheckerError, config.read, files)
|
||||
|
||||
def test_confparse_error2 (self):
|
||||
def test_confparse_error2(self):
|
||||
config = linkcheck.configuration.Configuration()
|
||||
files = [get_file("config2.ini")]
|
||||
self.assertRaises(linkcheck.LinkCheckerError, config.read, files)
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from linkcheck.logger.csvlog import CSVLogger
|
|||
|
||||
class TestCsvLogger(unittest.TestCase):
|
||||
|
||||
def test_parts (self):
|
||||
def test_parts(self):
|
||||
args = dict(
|
||||
filename=os.path.join(os.path.dirname(__file__), "testlog.csv"),
|
||||
parts=["realurl"],
|
||||
|
|
|
|||
|
|
@ -26,35 +26,35 @@ from linkcheck.strformat import limit
|
|||
class TestWsgi(unittest.TestCase):
|
||||
"""Test wsgi application."""
|
||||
|
||||
def test_form_valid_url (self):
|
||||
def test_form_valid_url(self):
|
||||
# Check url validity.
|
||||
env = dict()
|
||||
form = dict(url="http://www.example.com/", level="1")
|
||||
checkform(form, env)
|
||||
|
||||
def test_form_empty_url (self):
|
||||
def test_form_empty_url(self):
|
||||
# Check with empty url.
|
||||
env = dict()
|
||||
form = dict(url="", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form, env)
|
||||
|
||||
def test_form_default_url (self):
|
||||
def test_form_default_url(self):
|
||||
# Check with default url.
|
||||
env = dict()
|
||||
form = dict(url="http://", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form, env)
|
||||
|
||||
def test_form_invalid_url (self):
|
||||
def test_form_invalid_url(self):
|
||||
# Check url (in)validity.
|
||||
env = dict()
|
||||
form = dict(url="http://www.foo bar/", level="0")
|
||||
self.assertRaises(LCFormError, checkform, form, env)
|
||||
|
||||
def test_checklink (self):
|
||||
def test_checklink(self):
|
||||
form = dict(url="http://www.example.com/", level="0")
|
||||
checklink(form)
|
||||
|
||||
def test_application (self):
|
||||
def test_application(self):
|
||||
form = dict(url="http://www.example.com/", level="0")
|
||||
formdata = urllib.parse.urlencode(form)
|
||||
formdata = formdata.encode('ascii')
|
||||
|
|
|
|||
|
|
@ -27,14 +27,14 @@ class TestClamav(unittest.TestCase):
|
|||
self.clamav_conf = clamav.get_clamav_conf("/etc/clamav/clamd.conf")
|
||||
|
||||
@need_clamav
|
||||
def testClean (self):
|
||||
def testClean(self):
|
||||
data = b""
|
||||
infected, errors = clamav.scan(data, self.clamav_conf)
|
||||
self.assertFalse(infected)
|
||||
self.assertFalse(errors)
|
||||
|
||||
@need_clamav
|
||||
def testInfected (self):
|
||||
def testInfected(self):
|
||||
# from the clamav test direcotry: the clamav test file as html data
|
||||
data = (
|
||||
b'<a href="data:application/octet-stream;base64,'
|
||||
|
|
|
|||
|
|
@ -24,5 +24,5 @@ import linkcheck.director.console
|
|||
class TestConsole(unittest.TestCase):
|
||||
"""Test console operations."""
|
||||
|
||||
def test_internal_error (self):
|
||||
def test_internal_error(self):
|
||||
linkcheck.director.console.internal_error()
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@ import linkcheck.containers
|
|||
class TestLFUCache(unittest.TestCase):
|
||||
"""Test LFU cache implementation."""
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Set up self.d as empty LFU cache with default size of 1000."""
|
||||
self.size = 1000
|
||||
self.d = linkcheck.containers.LFUCache(self.size)
|
||||
|
||||
def test_num_uses (self):
|
||||
def test_num_uses(self):
|
||||
self.assertTrue(not self.d)
|
||||
self.d["a"] = 1
|
||||
self.assertTrue("a" in self.d)
|
||||
|
|
@ -38,21 +38,21 @@ class TestLFUCache(unittest.TestCase):
|
|||
dummy = self.d["a"]
|
||||
self.assertEqual(self.d.uses("a"), 1)
|
||||
|
||||
def test_values (self):
|
||||
def test_values(self):
|
||||
self.assertTrue(not self.d)
|
||||
self.d["a"] = 1
|
||||
self.d["b"] = 2
|
||||
self.assertEqual(set([1, 2]), set(self.d.values()))
|
||||
self.assertEqual(set([1, 2]), set(self.d.itervalues()))
|
||||
|
||||
def test_popitem (self):
|
||||
def test_popitem(self):
|
||||
self.assertTrue(not self.d)
|
||||
self.d["a"] = 42
|
||||
self.assertEqual(self.d.popitem(), ("a", 42))
|
||||
self.assertTrue(not self.d)
|
||||
self.assertRaises(KeyError, self.d.popitem)
|
||||
|
||||
def test_shrink (self):
|
||||
def test_shrink(self):
|
||||
self.assertTrue(not self.d)
|
||||
for i in range(self.size):
|
||||
self.d[i] = i
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ import linkcheck.director
|
|||
class TestCookies(unittest.TestCase):
|
||||
"""Test cookie routines."""
|
||||
|
||||
def test_cookie_parse_multiple_headers (self):
|
||||
def test_cookie_parse_multiple_headers(self):
|
||||
lines = [
|
||||
'Host: example.org',
|
||||
'Path: /hello',
|
||||
|
|
@ -46,7 +46,7 @@ class TestCookies(unittest.TestCase):
|
|||
self.assertEqual(cookies[1].name, 'spam')
|
||||
self.assertEqual(cookies[1].value, 'egg')
|
||||
|
||||
def test_cookie_parse_multiple_values (self):
|
||||
def test_cookie_parse_multiple_values(self):
|
||||
lines = [
|
||||
'Host: example.org',
|
||||
'Set-cookie: baggage="elitist"; comment="hologram"',
|
||||
|
|
@ -62,7 +62,7 @@ class TestCookies(unittest.TestCase):
|
|||
self.assertEqual(cookies[1].name, 'comment')
|
||||
self.assertEqual(cookies[1].value, 'hologram')
|
||||
|
||||
def test_cookie_parse_error (self):
|
||||
def test_cookie_parse_error(self):
|
||||
lines = [
|
||||
' Host: imaweevil.org',
|
||||
'Set-cookie: baggage="elitist"; comment="hologram"',
|
||||
|
|
@ -70,7 +70,7 @@ class TestCookies(unittest.TestCase):
|
|||
from_headers = linkcheck.cookies.from_headers
|
||||
self.assertRaises(ValueError, from_headers, "\r\n".join(lines))
|
||||
|
||||
def test_cookie_file (self):
|
||||
def test_cookie_file(self):
|
||||
# Regression test for https://github.com/linkchecker/linkchecker/issues/62
|
||||
config = linkcheck.configuration.Configuration()
|
||||
here = os.path.dirname(__file__)
|
||||
|
|
|
|||
|
|
@ -29,16 +29,16 @@ class TestDecorators(unittest.TestCase):
|
|||
Test decorators.
|
||||
"""
|
||||
|
||||
def test_timeit (self):
|
||||
def test_timeit(self):
|
||||
@linkcheck.decorators.timed()
|
||||
def f ():
|
||||
def f():
|
||||
return 42
|
||||
self.assertEqual(f(), 42)
|
||||
|
||||
def test_timeit2 (self):
|
||||
def test_timeit2(self):
|
||||
log = StringIO()
|
||||
@linkcheck.decorators.timed(log=log, limit=0)
|
||||
def f ():
|
||||
def f():
|
||||
time.sleep(1)
|
||||
return 42
|
||||
self.assertEqual(f(), 42)
|
||||
|
|
|
|||
|
|
@ -27,14 +27,14 @@ class TestDummy(unittest.TestCase):
|
|||
Test dummy object.
|
||||
"""
|
||||
|
||||
def test_creation (self):
|
||||
def test_creation(self):
|
||||
dummy = linkcheck.dummy.Dummy()
|
||||
dummy = linkcheck.dummy.Dummy("1")
|
||||
dummy = linkcheck.dummy.Dummy("1", "2")
|
||||
dummy = linkcheck.dummy.Dummy(a=1, b=2)
|
||||
dummy = linkcheck.dummy.Dummy("1", a=None, b=2)
|
||||
|
||||
def test_attributes (self):
|
||||
def test_attributes(self):
|
||||
dummy = linkcheck.dummy.Dummy()
|
||||
dummy.hulla
|
||||
dummy.hulla.bulla
|
||||
|
|
@ -42,14 +42,14 @@ class TestDummy(unittest.TestCase):
|
|||
del dummy.wulla
|
||||
del dummy.wulla.mulla
|
||||
|
||||
def test_methods (self):
|
||||
def test_methods(self):
|
||||
dummy = linkcheck.dummy.Dummy()
|
||||
dummy.hulla()
|
||||
dummy.hulla().bulla()
|
||||
if "a" in dummy:
|
||||
pass
|
||||
|
||||
def test_indexes (self):
|
||||
def test_indexes(self):
|
||||
dummy = linkcheck.dummy.Dummy()
|
||||
len(dummy)
|
||||
dummy[1] = dummy[2]
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class TestFilenames(unittest.TestCase):
|
|||
"""
|
||||
|
||||
@need_windows
|
||||
def test_nt_filename (self):
|
||||
def test_nt_filename(self):
|
||||
path = os.getcwd()
|
||||
realpath = get_nt_filename(path)
|
||||
self.assertEqual(path, realpath)
|
||||
|
|
|
|||
|
|
@ -27,10 +27,10 @@ file_non_existing = "ZZZ.i_dont_exist"
|
|||
class TestFileutil(unittest.TestCase):
|
||||
"""Test file utility functions."""
|
||||
|
||||
def test_size (self):
|
||||
def test_size(self):
|
||||
self.assertTrue(linkcheck.fileutil.get_size(file_existing) > 0)
|
||||
self.assertEqual(linkcheck.fileutil.get_size(file_non_existing), -1)
|
||||
|
||||
def test_mtime (self):
|
||||
def test_mtime(self):
|
||||
self.assertTrue(linkcheck.fileutil.get_mtime(file_existing) > 0)
|
||||
self.assertEqual(linkcheck.fileutil.get_mtime(file_non_existing), 0)
|
||||
|
|
|
|||
|
|
@ -83,7 +83,7 @@ class TestFtpparse(unittest.TestCase):
|
|||
Test FTP LIST line parsing.
|
||||
"""
|
||||
|
||||
def test_ftpparse (self):
|
||||
def test_ftpparse(self):
|
||||
for line, expected in patterns:
|
||||
res = ftpparse(line)
|
||||
self.assertEqual(expected, res,
|
||||
|
|
|
|||
|
|
@ -26,26 +26,26 @@ class TestLinkparser(unittest.TestCase):
|
|||
Test link parsing.
|
||||
"""
|
||||
|
||||
def _test_one_link (self, content, url):
|
||||
def _test_one_link(self, content, url):
|
||||
self.count_url = 0
|
||||
linkparse.find_links(htmlsoup.make_soup(content),
|
||||
self._test_one_url(url), linkparse.LinkTags)
|
||||
self.assertEqual(self.count_url, 1)
|
||||
|
||||
def _test_one_url (self, origurl):
|
||||
def _test_one_url(self, origurl):
|
||||
"""Return parser callback function."""
|
||||
def callback (url, line, column, name, base):
|
||||
def callback(url, line, column, name, base):
|
||||
self.count_url += 1
|
||||
self.assertEqual(origurl, url)
|
||||
return callback
|
||||
|
||||
def _test_no_link (self, content):
|
||||
def callback (url, line, column, name, base):
|
||||
def _test_no_link(self, content):
|
||||
def callback(url, line, column, name, base):
|
||||
self.assertTrue(False, 'URL %r found' % url)
|
||||
linkparse.find_links(htmlsoup.make_soup(content), callback,
|
||||
linkparse.LinkTags)
|
||||
|
||||
def test_href_parsing (self):
|
||||
def test_href_parsing(self):
|
||||
# Test <a href> parsing.
|
||||
content = '<a href="%s">'
|
||||
url = "alink"
|
||||
|
|
@ -76,7 +76,7 @@ class TestLinkparser(unittest.TestCase):
|
|||
url = "alink"
|
||||
self._test_no_link(content % url)
|
||||
|
||||
def test_css_parsing (self):
|
||||
def test_css_parsing(self):
|
||||
# Test css style attribute parsing.
|
||||
content = '<table style="background: url(%s) no-repeat" >'
|
||||
url = "alink"
|
||||
|
|
@ -96,7 +96,7 @@ class TestLinkparser(unittest.TestCase):
|
|||
content = "<table style='background: url( \"%s\") no-repeat' >"
|
||||
self._test_one_link(content % url, url)
|
||||
|
||||
def test_comment_stripping (self):
|
||||
def test_comment_stripping(self):
|
||||
strip = linkparse.strip_c_comments
|
||||
content = "/* url('http://example.org')*/"
|
||||
self.assertEqual(strip(content), "")
|
||||
|
|
|
|||
|
|
@ -25,13 +25,13 @@ import linkcheck.mimeutil
|
|||
class TestMiMeutil(unittest.TestCase):
|
||||
"""Test file utility functions."""
|
||||
|
||||
def mime_test (self, filename, mime_expected):
|
||||
def mime_test(self, filename, mime_expected):
|
||||
absfilename = get_file(filename)
|
||||
with open(absfilename) as fd:
|
||||
mime = linkcheck.mimeutil.guess_mimetype(absfilename, read=fd.read)
|
||||
self.assertEqual(mime, mime_expected)
|
||||
|
||||
def test_mime (self):
|
||||
def test_mime(self):
|
||||
filename = os.path.join("plist_binary", "Bookmarks.plist")
|
||||
self.mime_test(filename, "application/x-plist+safari")
|
||||
filename = os.path.join("plist_xml", "Bookmarks.plist")
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ class TestNetwork(unittest.TestCase):
|
|||
|
||||
@need_network
|
||||
@need_linux
|
||||
def test_iputils (self):
|
||||
def test_iputils(self):
|
||||
# note: need a hostname whose reverse lookup of the IP is the same host
|
||||
host = "dinsdale.python.org"
|
||||
ips = iputil.resolve_host(host)
|
||||
|
|
|
|||
|
|
@ -138,13 +138,13 @@ class TestParser(unittest.TestCase):
|
|||
"""
|
||||
|
||||
@parameterized.expand(parsetests)
|
||||
def test_parse (self, _in, _out):
|
||||
def test_parse(self, _in, _out):
|
||||
# Parse all test patterns in one go.
|
||||
out = StringIO()
|
||||
pretty_print_html(out, htmlsoup.make_soup(_in))
|
||||
self.check_results(_in, _out, out)
|
||||
|
||||
def check_results (self, _in, _out, out):
|
||||
def check_results(self, _in, _out, out):
|
||||
"""
|
||||
Check parse results.
|
||||
"""
|
||||
|
|
@ -153,30 +153,30 @@ class TestParser(unittest.TestCase):
|
|||
(_in, res, _out)
|
||||
self.assertEqual(res, _out, msg=msg)
|
||||
|
||||
def test_encoding_detection_utf_content (self):
|
||||
def test_encoding_detection_utf_content(self):
|
||||
html = b'<meta http-equiv="content-type" content="text/html; charset=UTF-8">'
|
||||
self.encoding_test(html, "utf-8")
|
||||
|
||||
def test_encoding_detection_utf_charset (self):
|
||||
def test_encoding_detection_utf_charset(self):
|
||||
html = b'<meta charset="UTF-8">'
|
||||
self.encoding_test(html, "utf-8")
|
||||
|
||||
def test_encoding_detection_iso_content (self):
|
||||
def test_encoding_detection_iso_content(self):
|
||||
html = b'<meta http-equiv="content-type" content="text/html; charset=ISO8859-1">'
|
||||
self.encoding_test(html, "iso8859-1")
|
||||
|
||||
def test_encoding_detection_iso_charset (self):
|
||||
def test_encoding_detection_iso_charset(self):
|
||||
html = b'<meta charset="ISO8859-1">'
|
||||
self.encoding_test(html, "iso8859-1")
|
||||
|
||||
def test_encoding_detection_iso_bad_charset (self):
|
||||
def test_encoding_detection_iso_bad_charset(self):
|
||||
html = b'<meta charset="hulla">'
|
||||
self.encoding_test(html, "ascii")
|
||||
|
||||
def test_encoding_detection_iso_bad_content (self):
|
||||
def test_encoding_detection_iso_bad_content(self):
|
||||
html = b'<meta http-equiv="content-type" content="text/html; charset=blabla">'
|
||||
self.encoding_test(html, "ascii")
|
||||
|
||||
def encoding_test (self, html, expected):
|
||||
def encoding_test(self, html, expected):
|
||||
soup = htmlsoup.make_soup(html)
|
||||
self.assertEqual(soup.original_encoding, expected)
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ class TestPo(unittest.TestCase):
|
|||
|
||||
@need_posix
|
||||
@need_msgfmt
|
||||
def test_pos (self):
|
||||
def test_pos(self):
|
||||
"""Test .po files syntax."""
|
||||
for f in get_pofiles():
|
||||
ret = os.system("msgfmt -c -o - %s > /dev/null" % f)
|
||||
|
|
@ -53,13 +53,13 @@ class TestGTranslator(unittest.TestCase):
|
|||
"""GTranslator displays a middot · for a space. Unfortunately, it
|
||||
gets copied with copy-and-paste, what a shame."""
|
||||
|
||||
def test_gtranslator (self):
|
||||
def test_gtranslator(self):
|
||||
"""Test all pofiles for GTranslator brokenness."""
|
||||
for f in get_pofiles():
|
||||
with open(f, 'rb') as fd:
|
||||
self.check_file(fd, f)
|
||||
|
||||
def check_file (self, fd, f):
|
||||
def check_file(self, fd, f):
|
||||
"""Test for GTranslator broken syntax."""
|
||||
for line in fd:
|
||||
if line.strip().startswith(b"#"):
|
||||
|
|
|
|||
|
|
@ -27,11 +27,11 @@ class TestRobotParser(unittest.TestCase):
|
|||
Test robots.txt parser (needs internet access).
|
||||
"""
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Initialize self.rp as a robots.txt parser."""
|
||||
self.rp = robotparser2.RobotFileParser()
|
||||
|
||||
def check (self, a, b):
|
||||
def check(self, a, b):
|
||||
"""Helper function comparing two results a and b."""
|
||||
if not b:
|
||||
ac = "access denied"
|
||||
|
|
@ -41,7 +41,7 @@ class TestRobotParser(unittest.TestCase):
|
|||
self.fail("%s != %s (%s)" % (a, b, ac))
|
||||
|
||||
@need_network
|
||||
def test_nonexisting_robots (self):
|
||||
def test_nonexisting_robots(self):
|
||||
# robots.txt that does not exist
|
||||
self.rp.set_url('http://www.lycos.com/robots.txt')
|
||||
self.rp.read()
|
||||
|
|
@ -49,7 +49,7 @@ class TestRobotParser(unittest.TestCase):
|
|||
'http://www.lycos.com/search'), True)
|
||||
|
||||
@need_network
|
||||
def test_disallowed_robots (self):
|
||||
def test_disallowed_robots(self):
|
||||
self.rp.set_url('http://google.com/robots.txt')
|
||||
self.rp.read()
|
||||
self.check(self.rp.can_fetch(configuration.UserAgent,
|
||||
|
|
|
|||
|
|
@ -27,13 +27,13 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
Test string formatting routines.
|
||||
"""
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""
|
||||
Initialize self.rp as a robots.txt parser.
|
||||
"""
|
||||
self.rp = linkcheck.robotparser2.RobotFileParser()
|
||||
|
||||
def test_robotstxt (self):
|
||||
def test_robotstxt(self):
|
||||
lines = [
|
||||
"User-agent: *",
|
||||
]
|
||||
|
|
@ -41,7 +41,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.assertTrue(self.rp.mtime() > 0)
|
||||
self.assertEqual(str(self.rp), "\n".join(lines))
|
||||
|
||||
def test_robotstxt2 (self):
|
||||
def test_robotstxt2(self):
|
||||
lines = [
|
||||
"User-agent: *",
|
||||
"Disallow: /search",
|
||||
|
|
@ -49,7 +49,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.rp.parse(lines)
|
||||
self.assertEqual(str(self.rp), "\n".join(lines))
|
||||
|
||||
def test_robotstxt3 (self):
|
||||
def test_robotstxt3(self):
|
||||
lines = [
|
||||
"Disallow: /search",
|
||||
"",
|
||||
|
|
@ -64,7 +64,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.rp.parse(lines)
|
||||
self.assertEqual(str(self.rp), "")
|
||||
|
||||
def test_robotstxt4 (self):
|
||||
def test_robotstxt4(self):
|
||||
lines = [
|
||||
"User-agent: Bla",
|
||||
"Disallow: /cgi-bin",
|
||||
|
|
@ -75,7 +75,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
lines.insert(2, "")
|
||||
self.assertEqual(str(self.rp), "\n".join(lines))
|
||||
|
||||
def test_robotstxt5 (self):
|
||||
def test_robotstxt5(self):
|
||||
lines = [
|
||||
"#one line comment",
|
||||
"User-agent: Bla",
|
||||
|
|
@ -90,7 +90,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.rp.parse(lines)
|
||||
self.assertEqual(str(self.rp), "\n".join(lines2))
|
||||
|
||||
def test_robotstxt6 (self):
|
||||
def test_robotstxt6(self):
|
||||
lines = [
|
||||
"User-agent: Bla",
|
||||
"",
|
||||
|
|
@ -98,7 +98,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.rp.parse(lines)
|
||||
self.assertEqual(str(self.rp), "")
|
||||
|
||||
def test_robotstxt7 (self):
|
||||
def test_robotstxt7(self):
|
||||
lines = [
|
||||
"User-agent: Bla",
|
||||
"Allow: /",
|
||||
|
|
@ -110,7 +110,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.assertEqual(str(self.rp), "\n".join(lines))
|
||||
self.assertTrue(self.rp.can_fetch("Bla", "/"))
|
||||
|
||||
def test_crawldelay (self):
|
||||
def test_crawldelay(self):
|
||||
lines = [
|
||||
"User-agent: Blubb",
|
||||
"Crawl-delay: 10",
|
||||
|
|
@ -127,7 +127,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.assertEqual(self.rp.get_crawldelay("Hulla"), 5)
|
||||
self.assertEqual(self.rp.get_crawldelay("Bulla"), 1)
|
||||
|
||||
def test_crawldelay2 (self):
|
||||
def test_crawldelay2(self):
|
||||
lines = [
|
||||
"User-agent: Blubb",
|
||||
"Crawl-delay: X",
|
||||
|
|
@ -136,13 +136,13 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
del lines[1]
|
||||
self.assertEqual(str(self.rp), "\n".join(lines))
|
||||
|
||||
def check_urls (self, good, bad, agent="test_robotparser"):
|
||||
def check_urls(self, good, bad, agent="test_robotparser"):
|
||||
for url in good:
|
||||
self.check_url(agent, url, True)
|
||||
for url in bad:
|
||||
self.check_url(agent, url, False)
|
||||
|
||||
def check_url (self, agent, url, can_fetch):
|
||||
def check_url(self, agent, url, can_fetch):
|
||||
if isinstance(url, tuple):
|
||||
agent, url = url
|
||||
res = self.rp.can_fetch(agent, url)
|
||||
|
|
@ -151,7 +151,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
else:
|
||||
self.assertFalse(res, "%s allowed" % url)
|
||||
|
||||
def test_access1 (self):
|
||||
def test_access1(self):
|
||||
lines = [
|
||||
"User-agent: *",
|
||||
"Disallow: /cyberworld/map/ # This is an infinite virtual URL space",
|
||||
|
|
@ -170,7 +170,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
bad = ['/cyberworld/map/index.html', '/tmp/xxx', '/foo.html']
|
||||
self.check_urls(good, bad)
|
||||
|
||||
def test_access2 (self):
|
||||
def test_access2(self):
|
||||
lines = [
|
||||
"# robots.txt for http://www.example.com/",
|
||||
"",
|
||||
|
|
@ -195,7 +195,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
bad = ['/cyberworld/map/index.html']
|
||||
self.check_urls(good, bad)
|
||||
|
||||
def test_access3 (self):
|
||||
def test_access3(self):
|
||||
lines = [
|
||||
"# go away",
|
||||
"User-agent: *",
|
||||
|
|
@ -211,7 +211,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
bad = ['/cyberworld/map/index.html', '/', '/tmp/']
|
||||
self.check_urls(good, bad)
|
||||
|
||||
def test_access4 (self):
|
||||
def test_access4(self):
|
||||
lines = [
|
||||
"User-agent: figtree",
|
||||
"Disallow: /tmp",
|
||||
|
|
@ -237,7 +237,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
self.check_urls(good, bad, 'figtree')
|
||||
self.check_urls(good, bad, 'FigTree/1.0 Robot libwww-perl/5.04')
|
||||
|
||||
def test_access5 (self):
|
||||
def test_access5(self):
|
||||
lines = [
|
||||
"User-agent: *",
|
||||
"Disallow: /tmp/",
|
||||
|
|
@ -261,7 +261,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
'/%7Ejoe/index.html']
|
||||
self.check_urls(good, bad)
|
||||
|
||||
def test_access6 (self):
|
||||
def test_access6(self):
|
||||
lines = [
|
||||
"User-Agent: *",
|
||||
"Disallow: /.",
|
||||
|
|
@ -271,7 +271,7 @@ class TestRobotsTxt(unittest.TestCase):
|
|||
bad = [] # Bug report says "/" should be denied, but that is not in the RFC
|
||||
self.check_urls(good, bad)
|
||||
|
||||
def test_access7 (self):
|
||||
def test_access7(self):
|
||||
lines = [
|
||||
"User-agent: Example",
|
||||
"Disallow: /example",
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ class TestStrFormat(unittest.TestCase):
|
|||
Test string formatting routines.
|
||||
"""
|
||||
|
||||
def test_unquote (self):
|
||||
def test_unquote(self):
|
||||
# Test quote stripping.
|
||||
u = linkcheck.strformat.unquote
|
||||
self.assertEqual(u(""), "")
|
||||
|
|
@ -52,7 +52,7 @@ class TestStrFormat(unittest.TestCase):
|
|||
self.assertEqual(u("'a\"", matching=True), "'a\"")
|
||||
self.assertEqual(u("\"a'", matching=True), "\"a'")
|
||||
|
||||
def test_wrap (self):
|
||||
def test_wrap(self):
|
||||
# Test line wrapping.
|
||||
wrap = linkcheck.strformat.wrap
|
||||
s = "11%(sep)s22%(sep)s33%(sep)s44%(sep)s55" % {'sep': os.linesep}
|
||||
|
|
@ -73,14 +73,14 @@ class TestStrFormat(unittest.TestCase):
|
|||
self.assertEqual(wrap(None, 10), None)
|
||||
self.assertFalse(linkcheck.strformat.get_paragraphs(None))
|
||||
|
||||
def test_remove_markup (self):
|
||||
def test_remove_markup(self):
|
||||
# Test markup removing.
|
||||
self.assertEqual(linkcheck.strformat.remove_markup("<a>"), "")
|
||||
self.assertEqual(linkcheck.strformat.remove_markup("<>"), "")
|
||||
self.assertEqual(linkcheck.strformat.remove_markup("<<>"), "")
|
||||
self.assertEqual(linkcheck.strformat.remove_markup("a < b"), "a < b")
|
||||
|
||||
def test_strsize (self):
|
||||
def test_strsize(self):
|
||||
# Test byte size strings.
|
||||
self.assertRaises(ValueError, linkcheck.strformat.strsize, -1)
|
||||
self.assertEqual(linkcheck.strformat.strsize(0), "0B")
|
||||
|
|
@ -96,35 +96,35 @@ class TestStrFormat(unittest.TestCase):
|
|||
self.assertEqual(linkcheck.strformat.strsize(1024*1024*1024*14),
|
||||
"14.0GB")
|
||||
|
||||
def test_is_ascii (self):
|
||||
def test_is_ascii(self):
|
||||
self.assertTrue(linkcheck.strformat.is_ascii("abcd./"))
|
||||
self.assertTrue(not linkcheck.strformat.is_ascii("ä"))
|
||||
self.assertTrue(not linkcheck.strformat.is_ascii("ä"))
|
||||
|
||||
def test_indent (self):
|
||||
def test_indent(self):
|
||||
s = "bla"
|
||||
self.assertEqual(linkcheck.strformat.indent(s, ""), s)
|
||||
self.assertEqual(linkcheck.strformat.indent(s, " "), " "+s)
|
||||
|
||||
def test_stripurl (self):
|
||||
def test_stripurl(self):
|
||||
self.assertEqual(linkcheck.strformat.stripurl("a\tb"), "a\tb")
|
||||
self.assertEqual(linkcheck.strformat.stripurl(" a\t b"), "a\t b")
|
||||
self.assertEqual(linkcheck.strformat.stripurl(" ab\t\ra\nb"), "ab")
|
||||
self.assertEqual(linkcheck.strformat.stripurl(None), None)
|
||||
self.assertEqual(linkcheck.strformat.stripurl(""), "")
|
||||
|
||||
def test_limit (self):
|
||||
def test_limit(self):
|
||||
self.assertEqual(linkcheck.strformat.limit("", 0), "")
|
||||
self.assertEqual(linkcheck.strformat.limit("a", 0), "")
|
||||
self.assertEqual(linkcheck.strformat.limit("1", 1), "1")
|
||||
self.assertEqual(linkcheck.strformat.limit("11", 1), "1...")
|
||||
|
||||
def test_strtime (self):
|
||||
def test_strtime(self):
|
||||
zone = linkcheck.strformat.strtimezone()
|
||||
t = linkcheck.strformat.strtime(0, func=time.gmtime)
|
||||
self.assertEqual(t, "1970-01-01 00:00:00"+zone)
|
||||
|
||||
def test_duration (self):
|
||||
def test_duration(self):
|
||||
duration = linkcheck.strformat.strduration
|
||||
self.assertEqual(duration(-0.5), "-00:01")
|
||||
self.assertEqual(duration(0), "00:00")
|
||||
|
|
@ -136,7 +136,7 @@ class TestStrFormat(unittest.TestCase):
|
|||
self.assertEqual(duration(60*60), "01:00:00")
|
||||
self.assertEqual(duration(60*60*24), "24:00:00")
|
||||
|
||||
def test_duration_long (self):
|
||||
def test_duration_long(self):
|
||||
duration = lambda s: linkcheck.strformat.strduration_long(s, do_translate=False)
|
||||
self.assertEqual(duration(-0.5), "-0.50 seconds")
|
||||
self.assertEqual(duration(0), "0.00 seconds")
|
||||
|
|
@ -151,23 +151,23 @@ class TestStrFormat(unittest.TestCase):
|
|||
self.assertEqual(duration(60*60*24*365 + 60*60*24 + 2),
|
||||
"1 year, 1 day")
|
||||
|
||||
def test_linenumber (self):
|
||||
def test_linenumber(self):
|
||||
get_line_number = linkcheck.strformat.get_line_number
|
||||
self.assertEqual(get_line_number("a", -5), 0)
|
||||
self.assertEqual(get_line_number("a", 0), 1)
|
||||
self.assertEqual(get_line_number("a\nb", 2), 2)
|
||||
|
||||
def test_encoding (self):
|
||||
def test_encoding(self):
|
||||
is_encoding = linkcheck.strformat.is_encoding
|
||||
self.assertTrue(is_encoding('ascii'))
|
||||
self.assertFalse(is_encoding('hulla'))
|
||||
|
||||
def test_unicode_safe (self):
|
||||
def test_unicode_safe(self):
|
||||
unicode_safe = linkcheck.strformat.unicode_safe
|
||||
self.assertEqual(unicode_safe("a"), "a")
|
||||
self.assertEqual(unicode_safe("a"), "a")
|
||||
|
||||
def test_ascii_safe (self):
|
||||
def test_ascii_safe(self):
|
||||
ascii_safe = linkcheck.strformat.ascii_safe
|
||||
self.assertEqual(ascii_safe("a"), "a")
|
||||
self.assertEqual(ascii_safe("ä"), "")
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue