Merge pull request #388 from cjmayo/tidyten9

Remove spaces after names
This commit is contained in:
Chris Mayo 2020-05-17 17:34:04 +01:00 committed by GitHub
commit 1e35530f9d
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
135 changed files with 1017 additions and 1016 deletions

View file

@ -23,7 +23,7 @@ import sys
from xml.etree.ElementTree import parse
def main (args):
def main(args):
filename = args[0]
with open(filename) as fd:
tree = parse(fd)

View file

@ -48,12 +48,12 @@ from .logconf import (
import _LinkChecker_configdata as configdata
def module_path ():
def module_path():
"""Return absolute directory of system executable."""
return os.path.dirname(os.path.abspath(sys.executable))
def get_install_data ():
def get_install_data():
"""Return absolute path of LinkChecker data installation directory."""
from .loader import is_frozen
if is_frozen():
@ -70,7 +70,7 @@ class LinkCheckerInterrupt(Exception):
pass
def get_link_pat (arg, strict=False):
def get_link_pat(arg, strict=False):
"""Get a link pattern matcher for intern/extern links.
Returns a compiled pattern and a negate and strict option.
@ -101,7 +101,7 @@ def get_link_pat (arg, strict=False):
}
def init_i18n (loc=None):
def init_i18n(loc=None):
"""Initialize i18n with the configured locale dir. The environment
variable LOCPATH can also specify a locale dir.
@ -127,7 +127,7 @@ def init_i18n (loc=None):
init_i18n()
def drop_privileges ():
def drop_privileges():
"""Make sure to drop root privileges on POSIX systems."""
if os.name != 'posix':
return

View file

@ -163,7 +163,7 @@ if os.name == 'nt':
Beep = "\007"
def esc_ansicolor (color):
def esc_ansicolor(color):
"""convert a named color definition to an escaped ANSI color"""
control = ''
if ";" in color:
@ -191,7 +191,7 @@ def get_win_color(color):
return foreground, background, style
def has_colors (fp):
def has_colors(fp):
"""Test if given file is an ANSI color enabled tty."""
# The is_tty() function ensures that we do not colorize
# redirected streams, as this is almost never what we want
@ -210,7 +210,7 @@ def has_colors (fp):
return False
def get_columns (fp):
def get_columns(fp):
"""Return number of columns for given file."""
if not is_tty(fp):
return 80
@ -226,7 +226,7 @@ def get_columns (fp):
return 80
def _write_color_colorama (fp, text, color):
def _write_color_colorama(fp, text, color):
"""Colorize text with given color."""
foreground, background, style = get_win_color(color)
colorama.set_console(foreground=foreground, background=background,
@ -235,7 +235,7 @@ def _write_color_colorama (fp, text, color):
colorama.reset_console()
def _write_color_ansi (fp, text, color):
def _write_color_ansi(fp, text, color):
"""Colorize text with given color."""
fp.write(esc_ansicolor(color))
fp.write(text)
@ -252,7 +252,7 @@ else:
class Colorizer:
"""Prints colored messages to streams."""
def __init__ (self, fp):
def __init__(self, fp):
"""Initialize with given stream (file-like object)."""
self.fp = fp
if has_colors(fp):
@ -260,26 +260,26 @@ class Colorizer:
else:
self.write = self._write
def _write (self, text, color=None):
def _write(self, text, color=None):
"""Print text as-is."""
self.fp.write(text)
def _write_color (self, text, color=None):
def _write_color(self, text, color=None):
"""Print text with given color. If color is None, print text as-is."""
if color is None:
self.fp.write(text)
else:
write_color(self.fp, text, color)
def __getattr__ (self, name):
def __getattr__(self, name):
"""Delegate attribute access to the stored stream object."""
return getattr(self.fp, name)
class ColoredStreamHandler (logging.StreamHandler):
class ColoredStreamHandler(logging.StreamHandler):
"""Send colored log messages to streams (file-like objects)."""
def __init__ (self, strm=None):
def __init__(self, strm=None):
"""Log to given stream (a file-like object) or to stderr if
strm is None.
"""
@ -293,12 +293,12 @@ class ColoredStreamHandler (logging.StreamHandler):
logging.DEBUG: 'white',
}
def get_color (self, record):
def get_color(self, record):
"""Get appropriate color according to log level.
"""
return self.colors.get(record.levelno, 'default')
def emit (self, record):
def emit(self, record):
"""Emit a record.
If a formatter is specified, it is used to format the record.

View file

@ -22,7 +22,7 @@ from xdg import xdg_config_home
nt_filename_encoding="mbcs"
def get_profile_dir ():
def get_profile_dir():
"""Return path where all profiles of current user are stored."""
if os.name == 'nt':
if "LOCALAPPDATA" in os.environ:
@ -46,7 +46,7 @@ def get_profile_dir ():
return dirpath
def find_bookmark_file (profile="Default"):
def find_bookmark_file(profile="Default"):
"""Return the bookmark file of the Default profile.
Returns absolute filename if found, or empty string if no bookmark file
could be found.

View file

@ -24,7 +24,7 @@ from xdg.BaseDirectory import xdg_config_home
nt_filename_encoding="mbcs"
def get_profile_dir ():
def get_profile_dir():
"""Return path where all profiles of current user are stored."""
if os.name == 'nt':
if "LOCALAPPDATA" in os.environ:
@ -48,7 +48,7 @@ def get_profile_dir ():
return dirpath
def find_bookmark_file (profile="Default"):
def find_bookmark_file(profile="Default"):
"""Return the bookmark file of the Default profile.
Returns absolute filename if found, or empty string if no bookmark file
could be found.
@ -64,7 +64,7 @@ def find_bookmark_file (profile="Default"):
return ""
def parse_bookmark_data (data):
def parse_bookmark_data(data):
"""Parse data string.
Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
@ -73,7 +73,7 @@ def parse_bookmark_data (data):
yield url, name
def parse_bookmark_file (file):
def parse_bookmark_file(file):
"""Parse file object.
Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
@ -82,14 +82,14 @@ def parse_bookmark_file (file):
yield url, name
def parse_bookmark_json (data):
def parse_bookmark_json(data):
"""Parse complete JSON data for Chromium Bookmarks."""
for entry in data["roots"].values():
for url, name in parse_bookmark_node(entry):
yield url, name
def parse_bookmark_node (node):
def parse_bookmark_node(node):
"""Parse one JSON node of Chromium Bookmarks."""
if node["type"] == "url":
yield node["url"], node["name"]

View file

@ -30,7 +30,7 @@ extension = re.compile(r'/places.sqlite$', re.IGNORECASE)
# Windows filename encoding
nt_filename_encoding="mbcs"
def get_profile_dir ():
def get_profile_dir():
"""Return path where all profiles of current user are stored."""
if os.name == 'nt':
basedir = unicode(os.environ["APPDATA"], nt_filename_encoding)
@ -40,7 +40,7 @@ def get_profile_dir ():
return dirpath
def find_bookmark_file (profile="*.default"):
def find_bookmark_file(profile="*.default"):
"""Return the first found places.sqlite file of the profile directories
ending with '.default' (or another given profile name).
Returns absolute filename if found, or empty string if no bookmark file
@ -57,7 +57,7 @@ def find_bookmark_file (profile="*.default"):
return ""
def parse_bookmark_file (filename):
def parse_bookmark_file(filename):
"""Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
Returns None if sqlite3 module is not installed.

View file

@ -25,7 +25,7 @@ OperaBookmarkFiles = (
)
def get_profile_dir ():
def get_profile_dir():
"""Return path where all profiles of current user are stored."""
if os.name == 'nt':
basedir = unicode(os.environ["APPDATA"], nt_filename_encoding)
@ -35,7 +35,7 @@ def get_profile_dir ():
return dirpath
def find_bookmark_file ():
def find_bookmark_file():
"""Return the bookmark file of the Opera profile.
Returns absolute filename if found, or empty string if no bookmark file
could be found.
@ -52,7 +52,7 @@ def find_bookmark_file ():
return ""
def parse_bookmark_data (data):
def parse_bookmark_data(data):
"""Return iterator for bookmarks of the form (url, name, line number).
Bookmarks are not sorted.
"""

View file

@ -24,12 +24,12 @@ except ImportError:
has_biplist = False
def get_profile_dir ():
def get_profile_dir():
"""Return path where all profiles of current user are stored."""
return os.path.join(os.environ["HOME"], "Library", "Safari")
def find_bookmark_file ():
def find_bookmark_file():
"""Return the bookmark file of the Default profile.
Returns absolute filename if found, or empty string if no bookmark file
could be found.
@ -47,21 +47,21 @@ def find_bookmark_file ():
return ""
def parse_bookmark_file (filename):
def parse_bookmark_file(filename):
"""Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
return parse_plist(get_plist_data_from_file(filename))
def parse_bookmark_data (data):
def parse_bookmark_data(data):
"""Return iterator for bookmarks of the form (url, name).
Bookmarks are not sorted.
"""
return parse_plist(get_plist_data_from_string(data))
def get_plist_data_from_file (filename):
def get_plist_data_from_file(filename):
"""Parse plist data for a file. Tries biplist, falling back to
plistlib."""
if has_biplist:
@ -74,7 +74,7 @@ def get_plist_data_from_file (filename):
return {}
def get_plist_data_from_string (data):
def get_plist_data_from_string(data):
"""Parse plist data for a string. Tries biplist, falling back to
plistlib."""
if has_biplist:
@ -105,11 +105,11 @@ def parse_plist(entry):
yield item
def is_leaf (entry):
def is_leaf(entry):
"""Return true if plist entry is an URL entry."""
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeLeaf'
def has_children (entry):
def has_children(entry):
"""Return true if plist entry has children."""
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeList'

View file

@ -33,7 +33,7 @@ class RobotsTxt:
format: {cache key (string) -> robots.txt content (RobotFileParser)}
"""
def __init__ (self, useragent):
def __init__(self, useragent):
"""Initialize per-URL robots.txt cache."""
# mapping {URL -> parsed robots.txt}
self.cache = LFUCache(size=100)
@ -41,13 +41,13 @@ class RobotsTxt:
self.roboturl_locks = {}
self.useragent = useragent
def allows_url (self, url_data):
def allows_url(self, url_data):
"""Ask robots.txt allowance."""
roboturl = url_data.get_robots_txt_url()
with self.get_lock(roboturl):
return self._allows_url(url_data, roboturl)
def _allows_url (self, url_data, roboturl):
def _allows_url(self, url_data, roboturl):
"""Ask robots.txt allowance. Assumes only single thread per robots.txt
URL calls this function."""
with cache_lock:

View file

@ -37,7 +37,7 @@ class UrlQueue:
"""A queue supporting several consumer tasks. The task_done() idea is
from the Python 2.5 implementation of Queue.Queue()."""
def __init__ (self, max_allowed_urls=None):
def __init__(self, max_allowed_urls=None):
"""Initialize the queue state and task counters."""
# Note: don't put a maximum size on the queue since it would
# lead to deadlocks when all worker threads called put().
@ -62,31 +62,31 @@ class UrlQueue:
self.max_allowed_urls = max_allowed_urls
self.num_puts = 0
def qsize (self):
def qsize(self):
"""Return the approximate size of the queue (not reliable!)."""
with self.mutex:
return len(self.queue)
def empty (self):
def empty(self):
"""Return True if the queue is empty, False otherwise.
Result is thread-safe, but not reliable since the queue could have
been changed before the result is returned!"""
with self.mutex:
return self._empty()
def _empty (self):
def _empty(self):
"""Return True if the queue is empty, False otherwise.
Not thread-safe!"""
return not self.queue
def get (self, timeout=None):
def get(self, timeout=None):
"""Get first not-in-progress url from the queue and
return it. If no such url is available return None.
"""
with self.not_empty:
return self._get(timeout)
def _get (self, timeout):
def _get(self, timeout):
"""Non thread-safe utility function of self.get() doing the real
work."""
if timeout is None:
@ -104,7 +104,7 @@ class UrlQueue:
self.in_progress += 1
return self.queue.popleft()
def put (self, item):
def put(self, item):
"""Put an item into the queue.
Block if necessary until a free slot is available.
"""
@ -112,7 +112,7 @@ class UrlQueue:
self._put(item)
self.not_empty.notify()
def _put (self, url_data):
def _put(self, url_data):
"""Put URL in queue, increase number of unfinished tasks."""
if self.shutdown or self.max_allowed_urls == 0:
return
@ -154,7 +154,7 @@ class UrlQueue:
self.queue.rotate(pos)
self.queue.appendleft(item)
def task_done (self, url_data):
def task_done(self, url_data):
"""
Indicate that a formerly enqueued task is complete.
@ -179,7 +179,7 @@ class UrlQueue:
raise ValueError('task_done() called too many times')
self.all_tasks_done.notifyAll()
def join (self, timeout=None):
def join(self, timeout=None):
"""Blocks until all items in the Queue have been gotten and processed.
The count of unfinished tasks goes up whenever an item is added to the
@ -202,7 +202,7 @@ class UrlQueue:
raise Timeout()
self.all_tasks_done.wait(remaining)
def do_shutdown (self):
def do_shutdown(self):
"""Shutdown the queue by not accepting any more URLs."""
with self.mutex:
unfinished = self.unfinished_tasks - len(self.queue)
@ -214,7 +214,7 @@ class UrlQueue:
self.unfinished_tasks = unfinished
self.shutdown = True
def status (self):
def status(self):
"""Get tuple (finished tasks, in progress, queue size)."""
# no need to acquire self.mutex since the numbers are unreliable anyways.
return (self.finished_tasks, self.in_progress, len(self.queue))

View file

@ -43,7 +43,7 @@ def guess_url(url):
return url
def absolute_url (base_url, base_ref, parent_url):
def absolute_url(base_url, base_ref, parent_url):
"""
Search for the absolute url to detect the link type. This does not
join any url fragments together!
@ -64,7 +64,7 @@ def absolute_url (base_url, base_ref, parent_url):
return ""
def get_url_from (base_url, recursion_level, aggregate,
def get_url_from(base_url, recursion_level, aggregate,
parent_url=None, base_ref=None, line=None, column=None,
page=0, name="", parent_content_type=None, extern=None, url_encoding=None):
"""
@ -125,7 +125,7 @@ def get_url_from (base_url, recursion_level, aggregate,
line=line, column=column, page=page, name=name, extern=extern, url_encoding=url_encoding)
def get_urlclass_from (scheme, assume_local_file=False):
def get_urlclass_from(scheme, assume_local_file=False):
"""Return checker class for given URL scheme. If the scheme
cannot be matched and assume_local_file is True, assume a local file.
"""
@ -154,7 +154,7 @@ def get_urlclass_from (scheme, assume_local_file=False):
return klass
def get_index_html (urls):
def get_index_html(urls):
"""
Construct artificial index.html from given URLs.

View file

@ -22,12 +22,12 @@ import socket
from . import urlbase
class DnsUrl (urlbase.UrlBase):
class DnsUrl(urlbase.UrlBase):
"""
Url link with dns scheme.
"""
def can_get_content (self):
def can_get_content(self):
"""
dns: URLs do not have any content

View file

@ -30,7 +30,7 @@ from ..bookmarks import firefox
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH
def get_files (dirname):
def get_files(dirname):
"""Get iterator of entries in directory. Only allows regular files
and directories, no symlinks."""
for entry in os.listdir(dirname):
@ -43,7 +43,7 @@ def get_files (dirname):
yield entry+"/"
def prepare_urlpath_for_nt (path):
def prepare_urlpath_for_nt(path):
"""
URLs like 'file://server/path/' result in a path named '/server/path'.
However urllib.url2pathname expects '////server/path'.
@ -53,7 +53,7 @@ def prepare_urlpath_for_nt (path):
return path
def get_nt_filename (path):
def get_nt_filename(path):
"""Return case sensitive filename for NT path."""
unc, rest = os.path.splitunc(path)
head, tail = os.path.split(rest)
@ -66,7 +66,7 @@ def get_nt_filename (path):
return path
def get_os_filename (path):
def get_os_filename(path):
"""Return filesystem path for given URL path."""
if os.name == 'nt':
path = prepare_urlpath_for_nt(path)
@ -77,7 +77,7 @@ def get_os_filename (path):
return res
def is_absolute_path (path):
def is_absolute_path(path):
"""Check if given path is absolute. On Windows absolute paths start
with a drive letter. On all other systems absolute paths start with
a slash."""
@ -88,12 +88,12 @@ def is_absolute_path (path):
return path.startswith("/")
class FileUrl (urlbase.UrlBase):
class FileUrl(urlbase.UrlBase):
"""
Url link with file scheme.
"""
def init (self, base_ref, base_url, parent_url, recursion_level,
def init(self, base_ref, base_url, parent_url, recursion_level,
aggregate, line, column, page, name, url_encoding, extern):
"""Initialize the scheme."""
super(FileUrl, self).init(base_ref, base_url, parent_url,
@ -128,7 +128,7 @@ class FileUrl (urlbase.UrlBase):
base_url = re.sub("^file://([^/])", r"file:///\1", base_url)
self.base_url = base_url
def build_url (self):
def build_url(self):
"""
Calls super.build_url() and adds a trailing slash to directories.
"""
@ -154,7 +154,7 @@ class FileUrl (urlbase.UrlBase):
self.urlparts[2] += '/'
self.url = urlutil.urlunsplit(self.urlparts)
def add_size_info (self):
def add_size_info(self):
"""Get size of file content and modification time from filename path."""
if self.is_directory():
# Directory size always differs from the customer index.html
@ -164,7 +164,7 @@ class FileUrl (urlbase.UrlBase):
self.size = fileutil.get_size(filename)
self.modified = datetime.utcfromtimestamp(fileutil.get_mtime(filename))
def check_connection (self):
def check_connection(self):
"""
Try to open the local file. Under NT systems the case sensitivity
is checked.
@ -180,7 +180,7 @@ class FileUrl (urlbase.UrlBase):
self.url_connection = urllib.request.urlopen(url)
self.check_case_sensitivity()
def check_case_sensitivity (self):
def check_case_sensitivity(self):
"""
Check if url and windows path name match cases
else there might be problems when copying such
@ -197,7 +197,7 @@ class FileUrl (urlbase.UrlBase):
{"path": path, "realpath": realpath},
tag=WARN_FILE_SYSTEM_PATH)
def read_content (self):
def read_content(self):
"""Return file content, or in case of directories a dummy HTML file
with links to the files."""
if self.is_directory():
@ -208,7 +208,7 @@ class FileUrl (urlbase.UrlBase):
data = super(FileUrl, self).read_content()
return data
def get_os_filename (self):
def get_os_filename(self):
"""
Construct os specific file path out of the file:// URL.
@ -217,11 +217,11 @@ class FileUrl (urlbase.UrlBase):
"""
return get_os_filename(self.urlparts[2])
def get_temp_filename (self):
def get_temp_filename(self):
"""Get filename for content to parse."""
return self.get_os_filename()
def is_directory (self):
def is_directory(self):
"""
Check if file is a directory.
@ -231,7 +231,7 @@ class FileUrl (urlbase.UrlBase):
filename = self.get_os_filename()
return os.path.isdir(filename) and not os.path.islink(filename)
def is_parseable (self):
def is_parseable(self):
"""Check if content is parseable for recursion.
@return: True if content is parseable
@ -246,7 +246,7 @@ class FileUrl (urlbase.UrlBase):
log.debug(LOG_CHECK, "File with content type %r is not parseable.", self.content_type)
return False
def set_content_type (self):
def set_content_type(self):
"""Return URL content type, or an empty string if content
type could not be found."""
if self.url:
@ -254,7 +254,7 @@ class FileUrl (urlbase.UrlBase):
else:
self.content_type = ""
def get_intern_pattern (self, url=None):
def get_intern_pattern(self, url=None):
"""Get pattern for intern URL matching.
@return non-empty regex pattern or None
@ -271,7 +271,7 @@ class FileUrl (urlbase.UrlBase):
url = url[:i+1]
return re.escape(url)
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
"""If a local webroot directory is configured, replace absolute URLs
with it. After that queue the URL data for checking."""
webroot = self.aggregate.config["localwebroot"]

View file

@ -25,12 +25,12 @@ from . import proxysupport, httpurl, internpaturl, get_index_html
from .const import WARN_FTP_MISSING_SLASH
class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"""
Url link with ftp scheme.
"""
def reset (self):
def reset(self):
"""
Initialize FTP url data.
"""
@ -41,7 +41,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.filename = None
self.filename_encoding = 'iso-8859-1'
def check_connection (self):
def check_connection(self):
"""
In case of proxy, delegate to HttpUrl. Else check in this
order: login, changing directory, list the file.
@ -67,7 +67,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.files = []
return None
def login (self):
def login(self):
"""Log into ftp server and check the welcome message."""
self.url_connection = ftplib.FTP(timeout=self.aggregate.config["timeout"])
if log.is_debug(LOG_CHECK):
@ -93,7 +93,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
raise LinkCheckerError(
_("Remote host has closed connection: %(msg)s") % str(msg))
def negotiate_encoding (self):
def negotiate_encoding(self):
"""Check if server can handle UTF-8 encoded filenames.
See also RFC 2640."""
try:
@ -106,7 +106,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
if " UTF-8" in features.splitlines():
self.filename_encoding = "utf-8"
def cwd (self):
def cwd(self):
"""
Change to URL parent directory. Return filename of last path
component.
@ -122,7 +122,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.url_connection.cwd(d)
return filename
def listfile (self):
def listfile(self):
"""
See if filename is in the current FTP directory.
"""
@ -143,11 +143,11 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
return
raise ftplib.error_perm("550 File not found")
def get_files (self):
def get_files(self):
"""Get list of filenames in directory. Subdirectories have an
ending slash."""
files = []
def add_entry (line):
def add_entry(line):
"""Parse list line and add the entry it points to to the file
list."""
log.debug(LOG_CHECK, "Directory entry %r", line)
@ -162,7 +162,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.url_connection.dir(add_entry)
return files
def is_parseable (self):
def is_parseable(self):
"""See if URL target is parseable for recursion."""
if self.is_directory():
return True
@ -171,18 +171,18 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
log.debug(LOG_CHECK, "URL with content type %r is not parseable.", self.content_type)
return False
def is_directory (self):
def is_directory(self):
"""See if URL target is a directory."""
# either the path is empty, or ends with a slash
path = self.urlparts[2]
return (not path) or path.endswith('/')
def set_content_type (self):
def set_content_type(self):
"""Set URL content type, or an empty string if content
type could not be found."""
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
def read_content (self):
def read_content(self):
"""Return URL target content, or in case of directories a dummy HTML
file with links to the files."""
if self.is_directory():
@ -194,7 +194,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# download file in BINARY mode
ftpcmd = "RETR %s" % self.filename
buf = StringIO()
def stor_data (s):
def stor_data(s):
"""Helper method storing given data"""
# limit the download size
if (buf.tell() + len(s)) > self.max_size:
@ -205,7 +205,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
buf.close()
return data
def close_connection (self):
def close_connection(self):
"""Release the open connection from the connection pool."""
if self.url_connection is not None:
try:

View file

@ -45,12 +45,13 @@ unicode_safe = strformat.unicode_safe
# match for robots meta element content attribute
nofollow_re = re.compile(r"\bnofollow\b", re.IGNORECASE)
class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"""
Url link with http scheme.
"""
def reset (self):
def reset(self):
"""
Initialize HTTP specific variables.
"""
@ -62,7 +63,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.ssl_cipher = None
self.ssl_cert = None
def allows_robots (self, url):
def allows_robots(self, url):
"""
Fetch and parse the robots.txt of given url. Checks if LinkChecker
can get the requested resource content.
@ -74,7 +75,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
"""
return not self.aggregate.config['robotstxt'] or self.aggregate.robots_txt.allows_url(self)
def content_allows_robots (self):
def content_allows_robots(self):
"""
Return False if the content of this URL forbids robots to
search for recursive links.
@ -85,7 +86,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
soup = self.get_soup()
return not soup.find("meta", attrs={"name": "robots", "content": nofollow_re})
def add_size_info (self):
def add_size_info(self):
"""Get size of URL content from HTTP header."""
if self.headers and "Content-Length" in self.headers and \
"Transfer-Encoding" not in self.headers:
@ -98,7 +99,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
else:
self.size = -1
def check_connection (self):
def check_connection(self):
"""
Check a URL with HTTP protocol.
Here is an excerpt from RFC 1945 with common response codes:
@ -204,7 +205,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
else:
self.ssl_cert = None
def construct_auth (self):
def construct_auth(self):
"""Construct HTTP Basic authentication credentials if there
is user/password information available. Does not overwrite if
credentials have already been constructed."""
@ -214,7 +215,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
if _user is not None and _password is not None:
self.auth = (_user, _password)
def set_content_type (self):
def set_content_type(self):
"""Return content MIME type or empty string."""
self.content_type = httputil.get_content_type(self.headers)
@ -269,7 +270,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
# run connection plugins for old connection
self.aggregate.plugin_manager.run_connection_plugins(self)
def getheader (self, name, default=None):
def getheader(self, name, default=None):
"""Get decoded header value.
@return: decoded header value or default of not found
@ -280,7 +281,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
return default
return unicode_safe(value, encoding=HEADER_ENCODING)
def check_response (self):
def check_response(self):
"""Check final result and log it."""
if self.url_connection.status_code >= 400:
self.set_result("%d %s" % (self.url_connection.status_code, self.url_connection.reason),
@ -332,7 +333,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
name = "Content-Location: header"
self.add_url(url, name=name)
def is_parseable (self):
def is_parseable(self):
"""
Check if content is parseable for recursion.
@ -352,7 +353,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
return False
return True
def get_robots_txt_url (self):
def get_robots_txt_url(self):
"""
Get the according robots.txt URL for this URL.

View file

@ -19,9 +19,9 @@ Handle ignored URLs.
from . import unknownurl
class IgnoreUrl (unknownurl.UnknownUrl):
class IgnoreUrl(unknownurl.UnknownUrl):
"""Always ignored URL."""
def is_ignored (self):
def is_ignored(self):
"""Return True if this URL scheme is ignored."""
return True

View file

@ -21,7 +21,7 @@ from . import urlbase, absolute_url
from .. import strformat, url as urlutil
def get_intern_pattern (url):
def get_intern_pattern(url):
"""Return intern pattern for given URL. Redirections to the same
domain with or without "www." prepended are allowed."""
parts = strformat.url_unicode_split(url)
@ -45,10 +45,10 @@ def get_intern_pattern (url):
return "^%s://%s%s" % tuple(args)
class InternPatternUrl (urlbase.UrlBase):
class InternPatternUrl(urlbase.UrlBase):
"""Class supporting an intern URL pattern."""
def get_intern_pattern (self, url=None):
def get_intern_pattern(self, url=None):
"""
Get pattern for intern URL matching.

View file

@ -28,7 +28,7 @@ from ..network import iputil
from .const import WARN_MAIL_NO_MX_HOST
def getaddresses (addr):
def getaddresses(addr):
"""Return list of email addresses from given field value."""
parsed = [mail for name, mail in AddressList(addr).addresslist if mail]
if parsed:
@ -41,19 +41,19 @@ def getaddresses (addr):
return addresses
def is_quoted (addr):
def is_quoted(addr):
"""Return True iff mail address string is quoted."""
return addr.startswith('"') and addr.endswith('"')
def is_literal (domain):
def is_literal(domain):
"""Return True iff domain string is a literal."""
return domain.startswith('[') and domain.endswith(']')
_remove_quoted = re.compile(r'\\.').sub
_quotes = re.compile(r'["\\]')
def is_missing_quote (addr):
def is_missing_quote(addr):
"""Return True iff mail address is not correctly quoted."""
return _quotes.match(_remove_quoted("", addr[1:-1]))
@ -62,12 +62,12 @@ def is_missing_quote (addr):
EMAIL_CGI_ADDRESS = ("to", "cc", "bcc")
EMAIL_CGI_SUBJECT = "subject"
class MailtoUrl (urlbase.UrlBase):
class MailtoUrl(urlbase.UrlBase):
"""
Url link with mailto scheme.
"""
def build_url (self):
def build_url(self):
"""Call super.build_url(), extract list of mail addresses from URL,
and check their syntax.
"""
@ -84,7 +84,7 @@ class MailtoUrl (urlbase.UrlBase):
self.add_warning(_("No mail addresses or email subject found in `%(url)s'.") % \
{"url": self.url})
def parse_addresses (self):
def parse_addresses(self):
"""Parse all mail addresses out of the URL target. Also parses
optional CGI headers like "?to=foo@example.org".
Stores parsed addresses in the self.addresses set.
@ -127,7 +127,7 @@ class MailtoUrl (urlbase.UrlBase):
self.addresses.update(getaddresses(url))
log.debug(LOG_CHECK, "addresses: %s", self.addresses)
def check_email_syntax (self, mail):
def check_email_syntax(self, mail):
"""Check email syntax. The relevant RFCs:
- How to check names (memo):
http://tools.ietf.org/html/rfc3696
@ -220,7 +220,7 @@ class MailtoUrl (urlbase.UrlBase):
{"addr": mail}, valid=False, overwrite=False)
return
def check_connection (self):
def check_connection(self):
"""
Verify a list of email addresses. If one address fails,
the whole list will fail.
@ -235,7 +235,7 @@ class MailtoUrl (urlbase.UrlBase):
if not self.valid:
break
def check_smtp_domain (self, mail):
def check_smtp_domain(self, mail):
"""
Check a single mail address.
"""
@ -292,7 +292,7 @@ class MailtoUrl (urlbase.UrlBase):
emails = ",".join(sorted(self.addresses))
self.cache_url = "%s:%s" % (self.scheme, emails)
def can_get_content (self):
def can_get_content(self):
"""
mailto: URLs do not have any content

View file

@ -28,12 +28,12 @@ from .const import WARN_NNTP_NO_SERVER, WARN_NNTP_NO_NEWSGROUP
random.seed()
class NntpUrl (urlbase.UrlBase):
class NntpUrl(urlbase.UrlBase):
"""
Url link with NNTP scheme.
"""
def check_connection (self):
def check_connection(self):
"""
Connect to NNTP server and try to request the URL article
resource (if specified).
@ -64,7 +64,7 @@ class NntpUrl (urlbase.UrlBase):
self.add_warning(_("No newsgroup specified in NNTP URL."),
tag=WARN_NNTP_NO_NEWSGROUP)
def _connect_nntp (self, nntpserver):
def _connect_nntp(self, nntpserver):
"""
This is done only once per checking task. Also, the newly
introduced error codes 504 and 505 (both inclining "Too busy, retry
@ -91,11 +91,11 @@ class NntpUrl (urlbase.UrlBase):
self.add_info(nntp.getwelcome())
return nntp
def wait (self):
def wait(self):
"""Wait some time before trying to connect again."""
time.sleep(random.randrange(10, 30))
def can_get_content (self):
def can_get_content(self):
"""
NNTP urls have no content.

View file

@ -25,7 +25,7 @@ from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
class ProxySupport:
"""Get support for proxying and for URLs with user:pass@host setting."""
def set_proxy (self, proxy):
def set_proxy(self, proxy):
"""Parse given proxy information and store parsed values.
Note that only http:// proxies are supported, both for ftp://
and http:// URLs.
@ -60,7 +60,7 @@ class ProxySupport:
auth = "%s:%s" % (username, password)
self.proxyauth = "Basic "+httputil.encode_base64(auth)
def ignore_proxy_host (self):
def ignore_proxy_host(self):
"""Check if self.host is in the $no_proxy ignore list."""
if urllib.request.proxy_bypass(self.host):
return True

View file

@ -27,12 +27,12 @@ def encode(s, encoding="iso-8859-1", errors="ignore"):
return s.encode(encoding, errors)
class TelnetUrl (urlbase.UrlBase):
class TelnetUrl(urlbase.UrlBase):
"""
Url link with telnet scheme.
"""
def build_url (self):
def build_url(self):
"""
Call super.build_url(), set default telnet port and initialize
the login credentials.
@ -44,7 +44,7 @@ class TelnetUrl (urlbase.UrlBase):
# set user/pass
self.user, self.password = self.get_user_password()
def local_check (self):
def local_check(self):
"""
Warn about empty host names. Else call super.local_check().
"""
@ -53,7 +53,7 @@ class TelnetUrl (urlbase.UrlBase):
return
super(TelnetUrl, self).local_check()
def check_connection (self):
def check_connection(self):
"""
Open a telnet connection and try to login. Expected login
label is "login: ", expected password label is "Password: ".
@ -71,7 +71,7 @@ class TelnetUrl (urlbase.UrlBase):
# XXX how to tell if we are logged in??
self.url_connection.write(b"exit\n")
def can_get_content (self):
def can_get_content(self):
"""
Telnet URLs have no content.

View file

@ -21,10 +21,10 @@ import re
from . import urlbase
class UnknownUrl (urlbase.UrlBase):
class UnknownUrl(urlbase.UrlBase):
"""Handle unknown or just plain broken URLs."""
def build_url (self):
def build_url(self):
"""Only logs that this URL is unknown."""
super(UnknownUrl, self).build_url()
if self.is_ignored():
@ -35,11 +35,11 @@ class UnknownUrl (urlbase.UrlBase):
self.set_result(_("URL is unrecognized or has invalid syntax"),
valid=False)
def is_ignored (self):
def is_ignored(self):
"""Return True if this URL scheme is ignored."""
return is_unknown_scheme(self.scheme)
def can_get_content (self):
def can_get_content(self):
"""Unknown URLs have no content.
@return: False

View file

@ -45,7 +45,7 @@ unicode_safe = strformat.unicode_safe
# schemes that are invalid with an empty hostname
scheme_requires_host = ("ftp", "http", "telnet")
def urljoin (parent, url):
def urljoin(parent, url):
"""
If url is relative, join parent and url. Else leave url as-is.
@ -56,7 +56,7 @@ def urljoin (parent, url):
return urllib.parse.urljoin(parent, url)
def url_norm (url, encoding):
def url_norm(url, encoding):
"""Wrapper for url.url_norm() to convert UnicodeError in
LinkCheckerError."""
try:
@ -95,7 +95,7 @@ class UrlBase:
# Read in 16kb chunks
ReadChunkBytes = 1024*16
def __init__ (self, base_url, recursion_level, aggregate,
def __init__(self, base_url, recursion_level, aggregate,
parent_url=None, base_ref=None, line=-1, column=-1, page=-1,
name="", url_encoding=None, extern=None):
"""
@ -126,7 +126,7 @@ class UrlBase:
if not self.has_result:
self.set_result(_("filtered"))
def init (self, base_ref, base_url, parent_url, recursion_level,
def init(self, base_ref, base_url, parent_url, recursion_level,
aggregate, line, column, page, name, url_encoding, extern):
"""
Initialize internal data.
@ -162,7 +162,7 @@ class UrlBase:
self.add_warning(_("Leading or trailing whitespace in URL `%(url)s'.") %
{"url": base_url}, tag=WARN_URL_WHITESPACE)
def reset (self):
def reset(self):
"""
Reset all variables to default values.
"""
@ -215,7 +215,7 @@ class UrlBase:
# URLs seen through redirections
self.aliases = []
def set_result (self, msg, valid=True, overwrite=False):
def set_result(self, msg, valid=True, overwrite=False):
"""
Set result string and validity.
"""
@ -233,7 +233,7 @@ class UrlBase:
# free content data
self.data = None
def get_title (self):
def get_title(self):
"""Return title of page the URL refers to.
This is per default the filename or the URL."""
if self.title is None:
@ -249,17 +249,17 @@ class UrlBase:
self.title = title
return self.title
def is_parseable (self):
def is_parseable(self):
"""
Return True iff content of this url is parseable.
"""
return False
def is_html (self):
def is_html(self):
"""Return True iff content of this url is HTML formatted."""
return self._is_ctype("html")
def is_css (self):
def is_css(self):
"""Return True iff content of this url is CSS stylesheet."""
return self._is_ctype("css")
@ -270,11 +270,11 @@ class UrlBase:
mime = self.content_type
return self.ContentMimetypes.get(mime) == ctype
def is_http (self):
def is_http(self):
"""Return True for http:// or https:// URLs."""
return self.scheme in ("http", "https")
def is_file (self):
def is_file(self):
"""Return True for file:// URLs."""
return self.scheme == "file"
@ -286,7 +286,7 @@ class UrlBase:
"""Return True for local (ie. file://) URLs."""
return self.is_file()
def add_warning (self, s, tag=None):
def add_warning(self, s, tag=None):
"""
Add a warning string.
"""
@ -295,14 +295,14 @@ class UrlBase:
tag not in self.aggregate.config["ignorewarnings"]:
self.warnings.append(item)
def add_info (self, s):
def add_info(self, s):
"""
Add an info string.
"""
if s not in self.info:
self.info.append(s)
def set_cache_url (self):
def set_cache_url(self):
"""Set the URL to be used for caching."""
# remove anchor from cached target url since we assume
# URLs with different anchors to have the same content
@ -310,7 +310,7 @@ class UrlBase:
if self.cache_url is not None:
assert isinstance(self.cache_url, str_text), repr(self.cache_url)
def check_syntax (self):
def check_syntax(self):
"""
Called before self.check(), this function inspects the
url syntax. Success enables further checking, failure
@ -343,7 +343,7 @@ class UrlBase:
args = dict(len=len(self.url), max=URL_MAX_LENGTH)
self.add_warning(_("URL length %(len)d is longer than %(max)d.") % args, tag=WARN_URL_TOO_LONG)
def build_url (self):
def build_url(self):
"""
Construct self.url and self.urlparts out of the given base
url information self.base_url, self.parent_url and self.base_ref.
@ -378,7 +378,7 @@ class UrlBase:
# and unsplit again
self.url = urlutil.urlunsplit(self.urlparts)
def build_url_parts (self):
def build_url_parts(self):
"""Set userinfo, host, port and anchor from self.urlparts.
Also checks for obfuscated IP addresses.
"""
@ -409,7 +409,7 @@ class UrlBase:
if self.anchor is not None:
assert isinstance(self.anchor, str_text), repr(self.anchor)
def check_obfuscated_ip (self):
def check_obfuscated_ip(self):
"""Warn if host of this URL is obfuscated IP address."""
# check if self.host can be an IP address
# check for obfuscated IP address
@ -422,7 +422,7 @@ class UrlBase:
{"url": self.base_url, "ip": ips[0]},
tag=WARN_URL_OBFUSCATED_IP)
def check (self):
def check(self):
"""Main check function for checking this URL."""
if self.aggregate.config["trace"]:
trace.trace_on()
@ -437,7 +437,7 @@ class UrlBase:
else:
raise
def local_check (self):
def local_check(self):
"""Local check function can be overridden in subclasses."""
log.debug(LOG_CHECK, "Checking %s", str_text(self))
# strict extern URLs should not be checked
@ -476,7 +476,7 @@ class UrlBase:
{"msg": str_text(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
return False
def close_connection (self):
def close_connection(self):
"""
Close an opened url connection.
"""
@ -490,7 +490,7 @@ class UrlBase:
pass
self.url_connection = None
def handle_exception (self):
def handle_exception(self):
"""
An exception occurred. Log it and set the cache flag.
"""
@ -510,14 +510,14 @@ class UrlBase:
# limit length to 240
return strformat.limit(errmsg, length=240)
def check_connection (self):
def check_connection(self):
"""
The basic connection check uses urlopen to initialize
a connection object.
"""
self.url_connection = urlopen(self.url)
def add_size_info (self):
def add_size_info(self):
"""Set size of URL content (if any)..
Should be overridden in subclasses."""
maxbytes = self.aggregate.config["maxfilesizedownload"]
@ -539,7 +539,7 @@ class UrlBase:
return False
return True
def allows_recursion (self):
def allows_recursion(self):
"""
Return True iff we can recurse into the url's content.
"""
@ -568,7 +568,7 @@ class UrlBase:
"""Returns True: only check robots.txt on HTTP links."""
return True
def set_extern (self, url):
def set_extern(self, url):
"""
Match URL against extern and intern link patterns. If no pattern
matches the URL is extern. Sets self.extern to a tuple (bool,
@ -600,12 +600,12 @@ class UrlBase:
else:
self.extern = (1, 1)
def set_content_type (self):
def set_content_type(self):
"""Set content MIME type.
Should be overridden in subclasses."""
pass
def can_get_content (self):
def can_get_content(self):
"""Indicate wether url get_content() can be called."""
return self.size <= self.aggregate.config["maxfilesizedownload"]
@ -632,7 +632,7 @@ class UrlBase:
self.data = self.download_content()
return self.data
def get_content (self):
def get_content(self):
if self.text is None:
self.get_raw_content()
self.soup = htmlsoup.make_soup(self.data)
@ -657,7 +657,7 @@ class UrlBase:
"""
return self.url_connection.read(self.ReadChunkBytes)
def get_user_password (self):
def get_user_password(self):
"""Get tuple (user, password) from configured authentication.
Both user and password can be None.
"""
@ -666,7 +666,7 @@ class UrlBase:
return urllib.parse.splitpasswd(self.userinfo)
return self.aggregate.config.get_user_password(self.url)
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
"""Add new URL to queue."""
if base:
base_ref = urlutil.url_norm(base, encoding=self.encoding)[0]
@ -677,7 +677,7 @@ class UrlBase:
page=page, name=name, parent_content_type=self.content_type, url_encoding=self.encoding)
self.aggregate.urlqueue.put(url_data)
def serialized (self, sep=os.linesep):
def serialized(self, sep=os.linesep):
"""
Return serialized url check data as unicode string.
"""
@ -696,7 +696,7 @@ class UrlBase:
"cache_url=%s" % self.cache_url,
])
def get_intern_pattern (self, url=None):
def get_intern_pattern(self, url=None):
"""Get pattern for intern URL matching.
@param url: the URL to set intern pattern for, else self.url
@ -737,7 +737,7 @@ class UrlBase:
s = str_text(self)
return self.aggregate.config['logger'].encode(s)
def __repr__ (self):
def __repr__(self):
"""
Get URL info.
@ -746,7 +746,7 @@ class UrlBase:
"""
return "<%s>" % self.serialized(sep=", ")
def to_wire_dict (self):
def to_wire_dict(self):
"""Return a simplified transport object for logging and caching.
The transport object must contain these attributes:
@ -813,7 +813,7 @@ class UrlBase:
modified=self.modified,
)
def to_wire (self):
def to_wire(self):
"""Return compact UrlData object with information from to_wire_dict().
"""
return CompactUrlData(self.to_wire_dict())

View file

@ -53,7 +53,7 @@ def print_plugins(folders, exit_code=0):
sys.exit(exit_code)
def print_usage (msg, exit_code=2):
def print_usage(msg, exit_code=2):
"""Print a program msg text to stderr and exit."""
program = sys.argv[0]
print(_("Error: %(msg)s") % {"msg": msg}, file=console.stderr)
@ -61,7 +61,7 @@ def print_usage (msg, exit_code=2):
sys.exit(exit_code)
def aggregate_url (aggregate, url, err_exit_code=2):
def aggregate_url(aggregate, url, err_exit_code=2):
"""Append given commandline URL to input queue."""
get_url_from = checker.get_url_from
url = checker.guess_url(url)

View file

@ -49,7 +49,7 @@ under certain conditions. Look at the file `LICENSE' within this
distribution."""
Portable = configdata.portable
def normpath (path):
def normpath(path):
"""Norm given system path with all available norm or expand functions
in os.path."""
expanded = os.path.expanduser(os.path.expandvars(path))
@ -87,12 +87,12 @@ def get_modules_info():
return "Modules: %s" % (", ".join(module_infos))
def get_share_dir ():
def get_share_dir():
"""Return absolute path of LinkChecker example configuration."""
return os.path.join(get_install_data(), "share", "linkchecker")
def get_share_file (filename, devel_dir=None):
def get_share_file(filename, devel_dir=None):
"""Return a filename in the share directory.
@param devel_dir: directory to search when developing
@ptype devel_dir: string
@ -144,13 +144,13 @@ def get_certifi_file():
# dynamic options
class Configuration (dict):
class Configuration(dict):
"""
Storage for configuration options. Options can both be given from
the command line as well as from configuration files.
"""
def __init__ (self):
def __init__(self):
"""
Initialize the default options.
"""
@ -210,18 +210,18 @@ class Configuration (dict):
"""Set the status logger."""
self.status_logger = status_logger
def logger_new (self, loggername, **kwargs):
def logger_new(self, loggername, **kwargs):
"""Instantiate new logger and return it."""
args = self[loggername]
args.update(kwargs)
return self.loggers[loggername](**args)
def logger_add (self, loggerclass):
def logger_add(self, loggerclass):
"""Add a new logger type to the known loggers."""
self.loggers[loggerclass.LoggerName] = loggerclass
self[loggerclass.LoggerName] = {}
def read (self, files=None):
def read(self, files=None):
"""
Read settings from given config files.
@ -247,7 +247,7 @@ class Configuration (dict):
log.debug(LOG_CHECK, "reading configuration from %s", filtered_cfiles)
confparse.LCConfigParser(self).read(filtered_cfiles)
def add_auth (self, user=None, password=None, pattern=None):
def add_auth(self, user=None, password=None, pattern=None):
"""Add given authentication data."""
if not user or not pattern:
log.warn(LOG_CHECK,
@ -260,7 +260,7 @@ class Configuration (dict):
)
self["authentication"].append(entry)
def get_user_password (self, url):
def get_user_password(self, url):
"""Get tuple (user, password) from configured authentication
that matches the given URL.
Both user and password can be None if not specified, or no
@ -275,7 +275,7 @@ class Configuration (dict):
"""Get dict with limit per connection type."""
return {key: self['maxconnections%s' % key] for key in ('http', 'https', 'ftp')}
def sanitize (self):
def sanitize(self):
"Make sure the configuration is consistent."
if self['logger'] is None:
self.sanitize_logger()
@ -287,14 +287,14 @@ class Configuration (dict):
# set default socket timeout
socket.setdefaulttimeout(self['timeout'])
def sanitize_logger (self):
def sanitize_logger(self):
"""Make logger configuration consistent."""
if not self['output']:
log.warn(LOG_CHECK, _("activating text logger output."))
self['output'] = 'text'
self['logger'] = self.logger_new(self['output'])
def sanitize_loginurl (self):
def sanitize_loginurl(self):
"""Make login configuration consistent."""
url = self["loginurl"]
disable = False
@ -322,7 +322,7 @@ class Configuration (dict):
_("disabling login URL %(url)s.") % {"url": url})
self["loginurl"] = None
def sanitize_proxies (self):
def sanitize_proxies(self):
"""Try to read additional proxy settings which urllib does not
support."""
if os.name != 'posix':
@ -428,7 +428,7 @@ def get_user_config():
return userconf
def get_gconf_http_proxy ():
def get_gconf_http_proxy():
"""Return host:port for GConf HTTP proxy if found, else None."""
try:
import gconf
@ -449,7 +449,7 @@ def get_gconf_http_proxy ():
return None
def get_gconf_ftp_proxy ():
def get_gconf_ftp_proxy():
"""Return host:port for GConf FTP proxy if found, else None."""
try:
import gconf
@ -469,7 +469,7 @@ def get_gconf_ftp_proxy ():
return None
def get_kde_http_proxy ():
def get_kde_http_proxy():
"""Return host:port for KDE HTTP proxy if found, else None."""
config_dir = get_kde_config_dir()
if not config_dir:
@ -483,7 +483,7 @@ def get_kde_http_proxy ():
pass
def get_kde_ftp_proxy ():
def get_kde_ftp_proxy():
"""Return host:port for KDE HTTP proxy if found, else None."""
config_dir = get_kde_config_dir()
if not config_dir:
@ -527,7 +527,7 @@ def get_kde_ftp_proxy ():
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
def get_kde_config_dir ():
def get_kde_config_dir():
"""Return KDE configuration directory or None if not found."""
kde_home = get_kde_home_dir()
if not kde_home:
@ -536,12 +536,12 @@ def get_kde_config_dir ():
return kde_home_to_config(kde_home)
def kde_home_to_config (kde_home):
def kde_home_to_config(kde_home):
"""Add subdirectories for config path to KDE home directory."""
return os.path.join(kde_home, "share", "config")
def get_kde_home_dir ():
def get_kde_home_dir():
"""Return KDE home directory or None if not found."""
if os.environ.get("KDEHOME"):
kde_home = os.path.abspath(os.environ["KDEHOME"])
@ -572,7 +572,7 @@ def get_kde_home_dir ():
loc_ro = re.compile(r"\[.*\]$")
@lru_cache(1)
def read_kioslaverc (kde_config_dir):
def read_kioslaverc(kde_config_dir):
"""Read kioslaverc into data dictionary."""
data = {}
filename = os.path.join(kde_config_dir, "kioslaverc")
@ -600,14 +600,14 @@ def read_kioslaverc (kde_config_dir):
return data
def add_kde_proxy (key, value, data):
def add_kde_proxy(key, value, data):
"""Add a proxy value to data dictionary after sanity checks."""
if not value or value[:3] == "//:":
return
data[key] = value
def add_kde_setting (key, value, data):
def add_kde_setting(key, value, data):
"""Add a KDE proxy setting value to data dictionary."""
if key == "ProxyType":
mode = None
@ -641,12 +641,12 @@ def add_kde_setting (key, value, data):
# XXX todo
def split_hosts (value):
def split_hosts(value):
"""Split comma-separated host list."""
return [host for host in value.split(", ") if host]
def resolve_indirect (data, key, splithosts=False):
def resolve_indirect(data, key, splithosts=False):
"""Replace name of environment variable with its value."""
value = data[key]
env_value = os.environ.get(value)
@ -659,7 +659,7 @@ def resolve_indirect (data, key, splithosts=False):
del data[key]
def resolve_kde_settings (data):
def resolve_kde_settings(data):
"""Write final proxy configuration values in data dictionary."""
if "mode" not in data:
return

View file

@ -21,7 +21,7 @@ import os
from .. import LinkCheckerError, get_link_pat, LOG_CHECK, log, fileutil, plugins, logconf
def read_multiline (value):
def read_multiline(value):
"""Helper function reading multiline values."""
for line in value.splitlines():
line = line.strip()
@ -30,17 +30,17 @@ def read_multiline (value):
yield line
class LCConfigParser (RawConfigParser):
class LCConfigParser(RawConfigParser):
"""
Parse a LinkChecker configuration file.
"""
def __init__ (self, config):
def __init__(self, config):
"""Initialize configuration."""
super(LCConfigParser, self).__init__()
self.config = config
def read (self, files):
def read(self, files):
"""Read settings from given config files.
@raises: LinkCheckerError on syntax errors in the config file(s)
@ -61,7 +61,7 @@ class LCConfigParser (RawConfigParser):
raise LinkCheckerError(
_("Error parsing configuration: %s") % str(msg))
def read_string_option (self, section, option, allowempty=False):
def read_string_option(self, section, option, allowempty=False):
"""Read a string option."""
if self.has_option(section, option):
value = self.get(section, option)
@ -74,7 +74,7 @@ class LCConfigParser (RawConfigParser):
if self.has_option(section, option):
self.config[option] = self.getboolean(section, option)
def read_int_option (self, section, option, key=None, min=None, max=None):
def read_int_option(self, section, option, key=None, min=None, max=None):
"""Read an integer option."""
if self.has_option(section, option):
num = self.getint(section, option)
@ -88,7 +88,7 @@ class LCConfigParser (RawConfigParser):
key = option
self.config[key] = num
def read_output_config (self):
def read_output_config(self):
"""Read configuration options in section "output"."""
section = "output"
from ..logger import LoggerClasses
@ -130,7 +130,7 @@ class LCConfigParser (RawConfigParser):
output = self.config.logger_new(val, fileoutput=1)
self.config['fileoutput'].append(output)
def read_checking_config (self):
def read_checking_config(self):
"""Read configuration options in section "checking"."""
section = "checking"
self.read_int_option(section, "threads", min=-1)
@ -157,7 +157,7 @@ class LCConfigParser (RawConfigParser):
self.read_string_option(section, "sslverify")
self.read_int_option(section, "maxrunseconds", min=0)
def read_authentication_config (self):
def read_authentication_config(self):
"""Read configuration options in section "authentication"."""
section = "authentication"
password_fields = []
@ -207,7 +207,7 @@ class LCConfigParser (RawConfigParser):
elif os.name == 'nt':
log.warn(LOG_CHECK, _("See http://support.microsoft.com/kb/308419 for more info on setting file permissions."))
def read_filtering_config (self):
def read_filtering_config(self):
"""
Read configuration options in section "filtering".
"""

View file

@ -17,17 +17,17 @@
Special container classes.
"""
class LFUCache (dict):
class LFUCache(dict):
"""Limited cache which purges least frequently used items."""
def __init__ (self, size=1000):
def __init__(self, size=1000):
"""Initialize internal LFU cache."""
super(LFUCache, self).__init__()
if size < 1:
raise ValueError("invalid cache size %d" % size)
self.size = size
def __setitem__ (self, key, val):
def __setitem__(self, key, val):
"""Store given key/value."""
if key in self:
# store value, do not increase number of uses
@ -38,7 +38,7 @@ class LFUCache (dict):
if len(self) > self.size:
self.shrink()
def shrink (self):
def shrink(self):
"""Shrink ca. 5% of entries."""
trim = int(0.05*len(self))
if trim:
@ -49,24 +49,24 @@ class LFUCache (dict):
for item in values[0:trim]:
del self[item[0]]
def __getitem__ (self, key):
def __getitem__(self, key):
"""Update key usage and return value."""
entry = super(LFUCache, self).__getitem__(key)
entry[0] += 1
return entry[1]
def uses (self, key):
def uses(self, key):
"""Get number of uses for given key (without increasing the number of
uses)"""
return super(LFUCache, self).__getitem__(key)[0]
def get (self, key, def_val=None):
def get(self, key, def_val=None):
"""Update key usage if found and return value, else return default."""
if key in self:
return self[key]
return def_val
def setdefault (self, key, def_val=None):
def setdefault(self, key, def_val=None):
"""Update key usage if found and return value, else set and return
default."""
if key in self:
@ -74,30 +74,30 @@ class LFUCache (dict):
self[key] = def_val
return def_val
def items (self):
def items(self):
"""Return list of items, not updating usage count."""
return [(key, value[1]) for key, value in super(LFUCache, self).items()]
def iteritems (self):
def iteritems(self):
"""Return iterator of items, not updating usage count."""
for key, value in super(LFUCache, self).items():
yield (key, value[1])
def values (self):
def values(self):
"""Return list of values, not updating usage count."""
return [value[1] for value in super(LFUCache, self).values()]
def itervalues (self):
def itervalues(self):
"""Return iterator of values, not updating usage count."""
for value in super(LFUCache, self).values():
yield value[1]
def popitem (self):
def popitem(self):
"""Remove and return an item."""
key, value = super(LFUCache, self).popitem()
return (key, value[1])
def pop (self):
def pop(self):
"""Remove and return a value."""
value = super(LFUCache, self).pop()
return value[1]

View file

@ -22,7 +22,7 @@ import email
import requests
def from_file (filename):
def from_file(filename):
"""Parse cookie data from a text file in HTTP header format.
@return: list of tuples (headers, scheme, host, path)
@ -43,7 +43,7 @@ def from_file (filename):
return entries
def from_headers (strheader):
def from_headers(strheader):
"""Parse cookie data from a string in HTTP header (RFC 2616) format.
@return: list of cookies

View file

@ -19,17 +19,17 @@ Simple decorators (usable in Python >= 2.4).
Example:
@synchronized(thread.allocate_lock())
def f ():
def f():
"Synchronized function"
print("i am synchronized:", f, f.__doc__)
@deprecated
def g ():
def g():
"this function is deprecated"
pass
@notimplemented
def h ():
def h():
"todo"
pass
@ -41,7 +41,7 @@ import sys
import time
def update_func_meta (fake_func, real_func):
def update_func_meta(fake_func, real_func):
"""Set meta information (eg. __doc__) of fake function to that
of the real function.
@return fake_func
@ -53,10 +53,10 @@ def update_func_meta (fake_func, real_func):
return fake_func
def deprecated (func):
def deprecated(func):
"""A decorator which can be used to mark functions as deprecated.
It emits a warning when the function is called."""
def newfunc (*args, **kwargs):
def newfunc(*args, **kwargs):
"""Print deprecated warning and execute original function."""
warnings.warn("Call to deprecated function %s." % func.__name__,
category=DeprecationWarning)
@ -64,7 +64,7 @@ def deprecated (func):
return update_func_meta(newfunc, func)
def signal_handler (signal_number):
def signal_handler(signal_number):
"""From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410666
A decorator to set the specified function as handler for a signal.
@ -74,7 +74,7 @@ def signal_handler (signal_number):
no handler is set.
"""
# create the 'real' decorator which takes only a function as an argument
def newfunc (function):
def newfunc(function):
"""Register function as signal handler."""
# note: actually the kill(2) function uses the signal number of 0
# for a special case, but for signal(2) only positive integers
@ -86,9 +86,9 @@ def signal_handler (signal_number):
return newfunc
def synchronize (lock, func, log_duration_secs=0):
def synchronize(lock, func, log_duration_secs=0):
"""Return synchronized function acquiring the given lock."""
def newfunc (*args, **kwargs):
def newfunc(*args, **kwargs):
"""Execute function synchronized."""
t = time.time()
with lock:
@ -99,14 +99,14 @@ def synchronize (lock, func, log_duration_secs=0):
return update_func_meta(newfunc, func)
def synchronized (lock):
def synchronized(lock):
"""A decorator calling a function with aqcuired lock."""
return lambda func: synchronize(lock, func)
def notimplemented (func):
def notimplemented(func):
"""Raises a NotImplementedError if the function is called."""
def newfunc (*args, **kwargs):
def newfunc(*args, **kwargs):
"""Raise NotImplementedError"""
co = func.func_code
attrs = (co.co_name, co.co_filename, co.co_firstlineno)
@ -114,10 +114,10 @@ def notimplemented (func):
return update_func_meta(newfunc, func)
def timeit (func, log, limit):
def timeit(func, log, limit):
"""Print execution time of the function. For quick'n'dirty profiling."""
def newfunc (*args, **kwargs):
def newfunc(*args, **kwargs):
"""Execute function and print execution time."""
t = time.time()
res = func(*args, **kwargs)
@ -130,7 +130,7 @@ def timeit (func, log, limit):
return update_func_meta(newfunc, func)
def timed (log=sys.stderr, limit=2.0):
def timed(log=sys.stderr, limit=2.0):
"""Decorator to run a function with timing info."""
return lambda func: timeit(func, log, limit)

View file

@ -24,7 +24,7 @@ from ..cache import urlqueue, robots_txt, results
from . import aggregator, console
def check_urls (aggregate):
def check_urls(aggregate):
"""Main check function; checks all configured URLs until interrupted
with Ctrl-C.
@return: None
@ -66,7 +66,7 @@ def check_urls (aggregate):
# and both should be handled by the calling layer.
def check_url (aggregate):
def check_url(aggregate):
"""Helper function waiting for URL queue."""
while True:
try:
@ -79,7 +79,7 @@ def check_url (aggregate):
break
def interrupt (aggregate):
def interrupt(aggregate):
"""Interrupt execution and shutdown, ignoring any subsequent
interrupts."""
while True:
@ -94,7 +94,7 @@ def interrupt (aggregate):
pass
def abort (aggregate):
def abort(aggregate):
"""Helper function to ensure a clean shutdown."""
while True:
try:
@ -108,7 +108,7 @@ def abort (aggregate):
abort_now()
def abort_now ():
def abort_now():
"""Force exit of current process without cleanup."""
if os.name == 'posix':
# Unix systems can use signals
@ -124,7 +124,7 @@ def abort_now ():
os._exit(3)
def get_aggregate (config):
def get_aggregate(config):
"""Get an aggregator instance with given configuration."""
_urlqueue = urlqueue.UrlQueue(max_allowed_urls=config["maxnumurls"])
_robots_txt = robots_txt.RobotsTxt(config["useragent"])

View file

@ -52,7 +52,7 @@ def new_request_session(config, cookies):
class Aggregate:
"""Store thread-safe data collections for checker threads."""
def __init__ (self, config, urlqueue, robots_txt, plugin_manager,
def __init__(self, config, urlqueue, robots_txt, plugin_manager,
result_cache):
"""Store given link checking objects."""
self.config = config
@ -105,7 +105,7 @@ class Aggregate:
raise LinkCheckerError("No cookies set by login URL %s" % url)
@synchronized(_threads_lock)
def start_threads (self):
def start_threads(self):
"""Spawn threads for URL checking and status printing."""
if self.config["status"]:
t = status.Status(self, self.config["status_wait_seconds"])
@ -150,7 +150,7 @@ class Aggregate:
self.times[host] = t + wait_time
@synchronized(_threads_lock)
def print_active_threads (self):
def print_active_threads(self):
"""Log all currently active threads."""
debug = log.is_debug(LOG_CHECK)
if debug:
@ -174,11 +174,11 @@ class Aggregate:
if name.startswith("CheckThread-"):
yield name
def cancel (self):
def cancel(self):
"""Empty the URL queue."""
self.urlqueue.do_shutdown()
def abort (self):
def abort(self):
"""Print still-active URLs and empty the URL queue."""
self.print_active_threads()
self.cancel()
@ -190,12 +190,12 @@ class Aggregate:
raise KeyboardInterrupt()
@synchronized(_threads_lock)
def remove_stopped_threads (self):
def remove_stopped_threads(self):
"""Remove the stopped threads from the internal thread list."""
self.threads = [t for t in self.threads if t.is_alive()]
@synchronized(_threads_lock)
def finish (self):
def finish(self):
"""Wait for checker threads to finish."""
if not self.urlqueue.empty():
# This happens when all checker threads died.
@ -206,7 +206,7 @@ class Aggregate:
t.join(timeout=1.0)
@synchronized(_threads_lock)
def is_finished (self):
def is_finished(self):
"""Determine if checking is finished."""
self.remove_stopped_threads()
return self.urlqueue.empty() and not self.threads

View file

@ -26,7 +26,7 @@ from .. import parser
QUEUE_POLL_INTERVALL_SECS = 1.0
def check_urls (urlqueue, logger):
def check_urls(urlqueue, logger):
"""Check URLs without threading."""
while not urlqueue.empty():
url_data = urlqueue.get()
@ -80,21 +80,21 @@ def check_url(url_data, logger):
class Checker(task.LoggedCheckedTask):
"""URL check thread."""
def __init__ (self, urlqueue, logger, add_request_session):
def __init__(self, urlqueue, logger, add_request_session):
"""Store URL queue and logger."""
super(Checker, self).__init__(logger)
self.urlqueue = urlqueue
self.origname = self.getName()
self.add_request_session = add_request_session
def run_checked (self):
def run_checked(self):
"""Check URLs in the queue."""
# construct per-thread HTTP/S requests session
self.add_request_session()
while not self.stopped(0):
self.check_url()
def check_url (self):
def check_url(self):
"""Try to get URL data from queue and check it."""
try:
url_data = self.urlqueue.get(timeout=QUEUE_POLL_INTERVALL_SECS)
@ -109,7 +109,7 @@ class Checker(task.LoggedCheckedTask):
except Exception:
self.internal_error()
def check_url_data (self, url_data):
def check_url_data(self, url_data):
"""Check one URL data instance."""
if url_data.url is None:
url = ""

View file

@ -29,11 +29,11 @@ stdout = i18n.get_encoded_writer()
class StatusLogger:
"""Standard status logger. Default output is stderr."""
def __init__ (self, fd=stderr):
def __init__(self, fd=stderr):
"""Save file descriptor for logging."""
self.fd = fd
def log_status (self, checked, in_progress, queue, duration, num_urls):
def log_status(self, checked, in_progress, queue, duration, num_urls):
"""Write status message to file descriptor."""
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
in_progress
@ -48,20 +48,20 @@ class StatusLogger:
self.writeln(msg)
self.flush()
def write (self, msg):
def write(self, msg):
"""Write message to file descriptor."""
self.fd.write(msg)
def writeln (self, msg):
def writeln(self, msg):
"""Write status message and line break to file descriptor."""
self.fd.write("%s%s" % (msg, os.linesep))
def flush (self):
def flush(self):
"""Flush file descriptor."""
self.fd.flush()
def internal_error (out=stderr, etype=None, evalue=None, tb=None):
def internal_error(out=stderr, etype=None, evalue=None, tb=None):
"""Print internal error message (output defaults to stderr)."""
print(os.linesep, file=out)
print(_("""********** Oops, I did it again. *************
@ -94,20 +94,20 @@ I can work with ;) .
_("******** LinkChecker internal error, over and out ********"), file=out)
def print_env_info (key, out=stderr):
def print_env_info(key, out=stderr):
"""If given environment key is defined, print it out."""
value = os.getenv(key)
if value is not None:
print(key, "=", repr(value), file=out)
def print_proxy_info (out=stderr):
def print_proxy_info(out=stderr):
"""Print proxy info."""
for key in ("http_proxy", "ftp_proxy", "no_proxy"):
print_env_info(key, out=out)
def print_locale_info (out=stderr):
def print_locale_info(out=stderr):
"""Print locale info."""
for key in ("LANGUAGE", "LC_ALL", "LC_CTYPE", "LANG"):
print_env_info(key, out=out)
@ -131,7 +131,7 @@ PYTHON_ENV_VARS = (
'PYTHONWARNINGS',
'PYTHONHASHSEED',
)
def print_app_info (out=stderr):
def print_app_info(out=stderr):
"""Print system and application info (output defaults to stderr)."""
print(_("System info:"), file=out)
print(configuration.App, file=out)
@ -146,7 +146,7 @@ def print_app_info (out=stderr):
print(_("sys.argv:"), sys.argv, file=out)
def print_version (out=stdout):
def print_version(out=stdout):
"""Print the program version (output defaults to stdout)."""
print(configuration.App, _("released"),
configuration.ReleaseDate, file=out)

View file

@ -19,14 +19,14 @@ from . import task
from .. import log, LOG_CHECK, strformat
class Interrupt (task.CheckedTask):
class Interrupt(task.CheckedTask):
"""Thread that raises KeyboardInterrupt after a specified duration.
This gives us a portable SIGALRM implementation.
The duration is checked every 5 seconds.
"""
WaitSeconds = 5
def __init__ (self, duration):
def __init__(self, duration):
"""Initialize the task.
@param duration: raise KeyboardInterrupt after given number of seconds
@ptype duration: int
@ -34,7 +34,7 @@ class Interrupt (task.CheckedTask):
super(Interrupt, self).__init__()
self.duration = duration
def run_checked (self):
def run_checked(self):
"""Wait and raise KeyboardInterrupt after."""
self.start_time = time.time()
self.setName("Interrupt")

View file

@ -24,28 +24,28 @@ _lock = threading.Lock()
class Logger:
"""Thread safe multi-logger class used by aggregator instances."""
def __init__ (self, config):
def __init__(self, config):
"""Initialize basic logging variables."""
self.loggers = [config['logger']]
self.loggers.extend(config['fileoutput'])
self.verbose = config["verbose"]
self.warnings = config["warnings"]
def start_log_output (self):
def start_log_output(self):
"""
Start output of all configured loggers.
"""
for logger in self.loggers:
logger.start_output()
def end_log_output (self, **kwargs):
def end_log_output(self, **kwargs):
"""
End output of all configured loggers.
"""
for logger in self.loggers:
logger.end_output(**kwargs)
def do_print (self, url_data):
def do_print(self, url_data):
"""Determine if URL entry should be logged or not."""
if self.verbose:
return True
@ -54,7 +54,7 @@ class Logger:
return not url_data.valid
@synchronized(_lock)
def log_url (self, url_data):
def log_url(self, url_data):
"""Send new url to all configured loggers."""
self.check_active_loggers()
do_print = self.do_print(url_data)
@ -64,7 +64,7 @@ class Logger:
log.log_filter_url(url_data, do_print)
@synchronized(_lock)
def log_internal_error (self):
def log_internal_error(self):
"""Document that an internal error occurred."""
for logger in self.loggers:
logger.log_internal_error()

View file

@ -18,10 +18,10 @@ import time
from . import task
class Status (task.LoggedCheckedTask):
class Status(task.LoggedCheckedTask):
"""Thread that gathers and logs the status periodically."""
def __init__ (self, aggregator, wait_seconds):
def __init__(self, aggregator, wait_seconds):
"""Initialize the status logger task.
@param urlqueue: the URL queue
@ptype urlqueue: Urlqueue
@ -36,7 +36,7 @@ class Status (task.LoggedCheckedTask):
self.wait_seconds = wait_seconds
assert self.wait_seconds >= 1
def run_checked (self):
def run_checked(self):
"""Print periodic status messages."""
self.start_time = time.time()
self.setName("Status")
@ -49,7 +49,7 @@ class Status (task.LoggedCheckedTask):
wait_seconds = self.wait_seconds
first_wait = False
def log_status (self):
def log_status(self):
"""Log a status message."""
duration = time.time() - self.start_time
checked, in_progress, queue = self.aggregator.urlqueue.status()

View file

@ -20,10 +20,10 @@ from .. import threader
from . import console
class CheckedTask (threader.StoppableThread):
class CheckedTask(threader.StoppableThread):
"""Stoppable URL check task, handling error conditions while running."""
def run (self):
def run(self):
"""Handle keyboard interrupt and other errors."""
try:
self.run_checked()
@ -33,25 +33,25 @@ class CheckedTask (threader.StoppableThread):
self.internal_error()
@notimplemented
def run_checked (self):
def run_checked(self):
"""Overload in subclass."""
pass
@notimplemented
def internal_error (self):
def internal_error(self):
"""Overload in subclass."""
pass
class LoggedCheckedTask (CheckedTask):
class LoggedCheckedTask(CheckedTask):
"""URL check task with a logger instance and internal error handling."""
def __init__ (self, logger):
def __init__(self, logger):
"""Initialize super instance and store given logger."""
super(CheckedTask, self).__init__()
self.logger = logger
def internal_error (self):
def internal_error(self):
"""Log an internal error on console and the logger."""
console.internal_error()
self.logger.log_internal_error()

View file

@ -20,59 +20,59 @@ Dummy objects.
class Dummy:
"""A dummy object ignores all access to it. Useful for testing."""
def __init__ (self, *args, **kwargs):
def __init__(self, *args, **kwargs):
"""Return None"""
pass
def __call__ (self, *args, **kwargs):
def __call__(self, *args, **kwargs):
"""Return self."""
return self
def __getattr__ (self, name):
def __getattr__(self, name):
"""Return self."""
return self
def __setattr__ (self, name, value):
def __setattr__(self, name, value):
"""Return None"""
pass
def __delattr__ (self, name):
def __delattr__(self, name):
"""Return None"""
pass
def __str__ (self):
def __str__(self):
"""Return 'dummy'"""
return "dummy"
def __repr__ (self):
def __repr__(self):
"""Return '<dummy>'"""
return "<dummy>"
def __unicode__ (self):
def __unicode__(self):
"""Return 'dummy'"""
return "dummy"
def __len__ (self):
def __len__(self):
"""Return zero"""
return 0
def __getitem__ (self, key):
def __getitem__(self, key):
"""Return self"""
return self
def __setitem__ (self, key, value):
def __setitem__(self, key, value):
"""Return None"""
pass
def __delitem__ (self, key):
def __delitem__(self, key):
"""Return None"""
pass
def __contains__ (self, key):
def __contains__(self, key):
"""Return False"""
return False
def dummy (*args, **kwargs):
def dummy(*args, **kwargs):
"""Ignore any positional or keyword arguments, return None."""
pass

View file

@ -28,7 +28,7 @@ from functools import lru_cache
from builtins import str as str_text
def has_module (name, without_error=True):
def has_module(name, without_error=True):
"""Test if given module can be imported.
@param without_error: True if module must not throw any errors when importing
@return: flag if import is successful
@ -47,14 +47,14 @@ def has_module (name, without_error=True):
class GlobDirectoryWalker:
"""A forward iterator that traverses a directory tree."""
def __init__ (self, directory, pattern="*"):
def __init__(self, directory, pattern="*"):
"""Set start directory and pattern matcher."""
self.stack = [directory]
self.pattern = pattern
self.files = []
self.index = 0
def __getitem__ (self, index):
def __getitem__(self, index):
"""Search for next filename."""
while True:
try:
@ -81,22 +81,22 @@ rglob = GlobDirectoryWalker
class Buffer:
"""Holds buffered data"""
def __init__ (self, empty=''):
def __init__(self, empty=''):
"""Initialize buffer."""
self.empty = self.buf = empty
self.tmpbuf = []
self.pos = 0
def __len__ (self):
def __len__(self):
"""Buffer length."""
return self.pos
def write (self, data):
def write(self, data):
"""Write data to buffer."""
self.tmpbuf.append(data)
self.pos += len(data)
def flush (self, overlap=0):
def flush(self, overlap=0):
"""Flush buffered data and return it."""
self.buf += self.empty.join(self.tmpbuf)
self.tmpbuf = []
@ -109,7 +109,7 @@ class Buffer:
return data
def get_mtime (filename):
def get_mtime(filename):
"""Return modification time of filename or zero on errors."""
try:
return os.path.getmtime(filename)
@ -117,7 +117,7 @@ def get_mtime (filename):
return 0
def get_size (filename):
def get_size(filename):
"""Return file size in Bytes, or -1 on error."""
try:
return os.path.getsize(filename)
@ -135,7 +135,7 @@ elif "G_BROKEN_FILENAMES" in os.environ:
else:
FSCODING = "utf-8"
def path_safe (path):
def path_safe(path):
"""Ensure path string is compatible with the platform file system encoding."""
if isinstance(path, str_text) and not os.path.supports_unicode_filenames:
path = path.encode(FSCODING, "replace").decode(FSCODING)
@ -144,7 +144,7 @@ def path_safe (path):
# cache for modified check {absolute filename -> mtime}
_mtime_cache = {}
def has_changed (filename):
def has_changed(filename):
"""Check if filename has changed since the last check. If this
is the first check, assume the file is changed."""
key = os.path.abspath(filename)
@ -155,14 +155,14 @@ def has_changed (filename):
return mtime > _mtime_cache[key]
def get_temp_file (mode='r', **kwargs):
def get_temp_file(mode='r', **kwargs):
"""Return tuple (open file object, filename) pointing to a temporary
file."""
fd, filename = tempfile.mkstemp(**kwargs)
return os.fdopen(fd, mode), filename
def is_tty (fp):
def is_tty(fp):
"""Check if is a file object pointing to a TTY."""
return (hasattr(fp, "isatty") and fp.isatty())

View file

@ -21,12 +21,12 @@ See also http://cr.yp.to/ftpparse.html
months = ("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep",
"oct", "nov", "dec")
def ismonth (txt):
def ismonth(txt):
"""Check if given text is a month name."""
return txt.lower() in months
def ftpparse (line):
def ftpparse(line):
"""Parse a FTP list line into a dictionary with attributes:
name - name of file (string)
trycwd - False if cwd is definitely pointless, True otherwise

View file

@ -91,13 +91,13 @@ swf_url_re = re.compile(b"(?i)%s" % urlutil.safe_url_pattern.encode('ascii'))
c_comment_re = re.compile(r"/\*.*?\*/", re.DOTALL)
def strip_c_comments (text):
def strip_c_comments(text):
"""Remove C/CSS-style comments from text. Note that this method also
deliberately removes comments inside of strings."""
return c_comment_re.sub('', text)
def is_meta_url (attr, attrs):
def is_meta_url(attr, attrs):
"""Check if the meta attributes contain a URL."""
res = False
if attr == "content":
@ -123,7 +123,7 @@ class LinkFinder:
"""Find HTML links, and apply them to the callback function with the
format (url, lineno, column, name, codebase)."""
def __init__ (self, callback, tags):
def __init__(self, callback, tags):
"""Store content in buffer and initialize URL list."""
self.callback = callback
# set universal tag attributes using tagname None
@ -135,7 +135,7 @@ class LinkFinder:
self.tags[tag].update(self.universal_attrs)
self.base_ref = ''
def html_element (self, tag, attrs, element_text, lineno, column):
def html_element(self, tag, attrs, element_text, lineno, column):
"""Search for links and store found URLs in a list."""
log.debug(LOG_CHECK, "LinkFinder tag %s attrs %s", tag, attrs)
log.debug(LOG_CHECK, "line %d col %d", lineno, column)
@ -166,7 +166,7 @@ class LinkFinder:
self.parse_tag(tag, attr, value, name, base, lineno, column)
log.debug(LOG_CHECK, "LinkFinder finished tag %s", tag)
def get_link_name (self, tag, attrs, attr, name=None):
def get_link_name(self, tag, attrs, attr, name=None):
"""Parse attrs for link name. Return name of link."""
if tag == 'a' and attr == 'href':
if not name:
@ -179,7 +179,7 @@ class LinkFinder:
name = ""
return name
def parse_tag (self, tag, attr, value, name, base, lineno, column):
def parse_tag(self, tag, attr, value, name, base, lineno, column):
"""Add given url data to url list."""
assert isinstance(tag, str_text), repr(tag)
assert isinstance(attr, str_text), repr(attr)

View file

@ -17,7 +17,7 @@ import base64
from datetime import datetime
def encode_base64 (s):
def encode_base64(s):
"""Encode given string in base64, excluding trailing newlines."""
return base64.b64encode(s)
@ -68,7 +68,7 @@ def asn1_generaltime_to_seconds(timestr):
pass
return res
def has_header_value (headers, name, value):
def has_header_value(headers, name, value):
"""
Look in headers for a specific header name and value.
Both name and value are case insensitive.
@ -84,7 +84,7 @@ def has_header_value (headers, name, value):
return False
def get_content_type (headers):
def get_content_type(headers):
"""
Get the MIME type from the Content-Type header value, or
'application/octet-stream' if not found.

View file

@ -30,32 +30,32 @@ default_language = default_encoding = None
default_directory = None
default_domain = None
def install_builtin (translator, do_unicode):
def install_builtin(translator, do_unicode):
"""Install _() and _n() gettext methods into default namespace."""
import builtins
builtins.__dict__['_'] = translator.gettext
# also install ngettext
builtins.__dict__['_n'] = translator.ngettext
class Translator (gettext.GNUTranslations):
class Translator(gettext.GNUTranslations):
"""A translation class always installing its gettext methods into the
default namespace."""
def install (self, do_unicode):
def install(self, do_unicode):
"""Install gettext methods into the default namespace."""
install_builtin(self, do_unicode)
class NullTranslator (gettext.NullTranslations):
class NullTranslator(gettext.NullTranslations):
"""A dummy translation class always installing its gettext methods into
the default namespace."""
def install (self, do_unicode):
def install(self, do_unicode):
"""Install gettext methods into the default namespace."""
install_builtin(self, do_unicode)
def init (domain, directory, loc=None):
def init(domain, directory, loc=None):
"""Initialize this gettext i18n module. Searches for supported languages
and installs the gettext translator class."""
global default_language, default_encoding, default_domain, default_directory
@ -90,7 +90,7 @@ def install_language(language):
translator.install(do_unicode)
def get_translator (domain, directory, languages=None,
def get_translator(domain, directory, languages=None,
translatorklass=Translator, fallback=False,
fallbackklass=NullTranslator):
"""Search the appropriate GNUTranslations class."""
@ -101,14 +101,14 @@ def get_translator (domain, directory, languages=None,
return translator
def get_lang (lang):
def get_lang(lang):
"""Return lang if it is supported, or the default language."""
if lang in supported_languages:
return lang
return default_language
def get_headers_lang (headers):
def get_headers_lang(headers):
"""Return preferred supported language in given HTTP headers."""
if 'Accept-Language' not in headers:
return default_language
@ -132,7 +132,7 @@ def get_headers_lang (headers):
return default_language
def get_locale ():
def get_locale():
"""Search the default platform locale and norm it.
@returns (locale, encoding)
@rtype (string, string)"""
@ -150,7 +150,7 @@ def get_locale ():
return (loc, encoding)
def norm_locale (loc):
def norm_locale(loc):
"""Normalize a locale."""
loc = locale.normalize(loc)
# split up the locale into its base components
@ -175,17 +175,17 @@ lang_transis = {
'en': {'de': 'Englisch'},
}
def lang_name (lang):
def lang_name(lang):
"""Return full name of given language."""
return lang_names[lang]
def lang_trans (lang, curlang):
def lang_trans(lang, curlang):
"""Return translated full name of given language."""
return lang_transis[lang][curlang]
def get_encoded_writer (out=sys.stdout, encoding=None, errors='replace'):
def get_encoded_writer(out=sys.stdout, encoding=None, errors='replace'):
"""Get wrapped output writer with given encoding and error handling."""
if encoding is None:
encoding = default_encoding

View file

@ -81,7 +81,7 @@ def get_response_headers():
]
def formvalue (form, key):
def formvalue(form, key):
"""Get value with given key from WSGI form."""
field = form.get(key)
if isinstance(field, list):
@ -99,7 +99,7 @@ class ThreadsafeIO:
self.closed = False
@synchronized(_lock)
def write (self, data):
def write(self, data):
"""Write given unicode data to buffer."""
assert isinstance(data, str_text)
if self.closed:
@ -108,14 +108,14 @@ class ThreadsafeIO:
self.buf.append(data)
@synchronized(_lock)
def get_data (self):
def get_data(self):
"""Get bufferd unicode data."""
data = "".join(self.buf)
self.buf = []
return data
@synchronized(_lock)
def close (self):
def close(self):
"""Reset buffer and close this I/O object."""
self.buf = []
self.closed = True
@ -126,7 +126,7 @@ def encode(s):
return s.encode(HTML_ENCODING, 'ignore')
def checklink (form=None, env=os.environ):
def checklink(form=None, env=os.environ):
"""Validates the CGI form and checks the given links."""
if form is None:
form = {}
@ -147,7 +147,7 @@ def checklink (form=None, env=os.environ):
out.close()
def start_check (aggregate, out):
def start_check(aggregate, out):
"""Start checking in background and write encoded output to out."""
# check in background
t = threading.Thread(target=director.check_urls, args=(aggregate,))
@ -183,12 +183,12 @@ def get_configuration(form, out):
return config
def get_host_name (form):
def get_host_name(form):
"""Return host name of given URL."""
return urllib.parse.urlparse(formvalue(form, "url"))[1]
def checkform (form, env):
def checkform(form, env):
"""Check form data. throw exception on error
Be sure to NOT print out any user-given data as HTML code, so use
only plain strings as exception text."""
@ -227,13 +227,13 @@ def checkform (form, env):
raise LCFormError(_("invalid %s option %r") % (option, value))
def log (env, msg):
def log(env, msg):
"""Log message to WSGI error output."""
logfile = env['wsgi.errors']
logfile.write("%s\n" % msg)
def dump (env, form):
def dump(env, form):
"""Log environment and form."""
for var, value in env.items():
log(env, var+"="+value)
@ -241,7 +241,7 @@ def dump (env, form):
log(env, str(formvalue(form, key)))
def format_error (why):
def format_error(why):
"""Format standard error page.
@param why: error message
@ptype why: unicode

View file

@ -14,7 +14,7 @@ import imp
from .fileutil import is_writable_by_others
def is_frozen ():
def is_frozen():
"""Return True if running inside a py2exe- or py2app-generated
executable."""
return hasattr(sys, "frozen")

View file

@ -19,7 +19,7 @@ Locking utility class.
import threading
from . import log, LOG_THREAD
def get_lock (name, debug=False):
def get_lock(name, debug=False):
"""Get a new lock.
@param debug: if True, acquire() and release() will have debug messages
@ptype debug: boolean, default is False
@ -36,19 +36,19 @@ def get_lock (name, debug=False):
class DebugLock:
"""Debugging lock class."""
def __init__ (self, lock, name):
def __init__(self, lock, name):
"""Store lock and name parameters."""
self.lock = lock
self.name = name
def acquire (self, blocking=1):
def acquire(self, blocking=1):
"""Acquire lock."""
threadname = threading.currentThread().getName()
log.debug(LOG_THREAD, "Acquire %s for %s", self.name, threadname)
self.lock.acquire(blocking)
log.debug(LOG_THREAD, "...acquired %s for %s", self.name, threadname)
def release (self):
def release(self):
"""Release lock."""
threadname = threading.currentThread().getName()
log.debug(LOG_THREAD, "Release %s for %s", self.name, threadname)

View file

@ -29,7 +29,7 @@ import traceback
#gc.set_debug(gc.DEBUG_LEAK)
PRINT_LOCALVARS = False
def _stack_format (stack):
def _stack_format(stack):
"""Format a stack trace to a message.
@return: formatted stack message
@ -54,7 +54,7 @@ def _stack_format (stack):
return s.getvalue()
def _log (fun, msg, args, **kwargs):
def _log(fun, msg, args, **kwargs):
"""Log a message with given function. Optional the following keyword
arguments are supported:
traceback(bool) - if True print traceback of current function
@ -70,7 +70,7 @@ def _log (fun, msg, args, **kwargs):
fun(traceback.format_exc())
def debug (logname, msg, *args, **kwargs):
def debug(logname, msg, *args, **kwargs):
"""Log a debug message.
return: None
@ -80,7 +80,7 @@ def debug (logname, msg, *args, **kwargs):
_log(log.debug, msg, args, **kwargs)
def info (logname, msg, *args, **kwargs):
def info(logname, msg, *args, **kwargs):
"""Log an informational message.
return: None
@ -90,7 +90,7 @@ def info (logname, msg, *args, **kwargs):
_log(log.info, msg, args, **kwargs)
def warn (logname, msg, *args, **kwargs):
def warn(logname, msg, *args, **kwargs):
"""Log a warning.
return: None
@ -100,7 +100,7 @@ def warn (logname, msg, *args, **kwargs):
_log(log.warning, msg, args, **kwargs)
def error (logname, msg, *args, **kwargs):
def error(logname, msg, *args, **kwargs):
"""Log an error.
return: None
@ -110,7 +110,7 @@ def error (logname, msg, *args, **kwargs):
_log(log.error, msg, args, **kwargs)
def critical (logname, msg, *args, **kwargs):
def critical(logname, msg, *args, **kwargs):
"""Log a critical error.
return: None
@ -120,7 +120,7 @@ def critical (logname, msg, *args, **kwargs):
_log(log.critical, msg, args, **kwargs)
def exception (logname, msg, *args, **kwargs):
def exception(logname, msg, *args, **kwargs):
"""Log an exception.
return: None
@ -130,11 +130,11 @@ def exception (logname, msg, *args, **kwargs):
_log(log.exception, msg, args, **kwargs)
def is_debug (logname):
def is_debug(logname):
"""See if logger is on debug level."""
return logging.getLogger(logname).isEnabledFor(logging.DEBUG)
def shutdown ():
def shutdown():
"""Flush and close all log handlers."""
logging.shutdown()

View file

@ -63,7 +63,7 @@ def init_log_config(handler=None):
add_loghandler(handler)
def add_loghandler (handler):
def add_loghandler(handler):
"""Add log handler to root logger and LOG_ROOT and set formatting."""
format = "%(levelname)s %(name)s %(asctime)s %(threadName)s %(message)s"
handler.setFormatter(logging.Formatter(format))
@ -71,7 +71,7 @@ def add_loghandler (handler):
logging.getLogger().addHandler(handler)
def remove_loghandler (handler):
def remove_loghandler(handler):
"""Remove log handler from root logger and LOG_ROOT."""
logging.getLogger(LOG_ROOT).removeHandler(handler)
logging.getLogger().removeHandler(handler)

View file

@ -64,11 +64,11 @@ class LogStatistics:
- URL lengths
"""
def __init__ (self):
def __init__(self):
"""Initialize log statistics."""
self.reset()
def reset (self):
def reset(self):
"""Reset all log statistics to default values."""
# number of logged URLs
self.number = 0
@ -92,7 +92,7 @@ class LogStatistics:
# overall downloaded bytes
self.downloaded_bytes = None
def log_url (self, url_data, do_print):
def log_url(self, url_data, do_print):
"""Log URL statistics."""
self.number += 1
if not url_data.valid:
@ -124,12 +124,12 @@ class LogStatistics:
# calculate running average
self.avg_url_length += (l - self.avg_url_length) / self.avg_number
def log_internal_error (self):
def log_internal_error(self):
"""Increase internal error count."""
self.internal_errors += 1
class _Logger (abc.ABC):
class _Logger(abc.ABC):
"""
Base class for logging of checked urls. It defines the public API
(see below) and offers basic functionality for all loggers.
@ -164,7 +164,7 @@ class _Logger (abc.ABC):
# Default log configuration
LoggerArgs = {}
def __init__ (self, **args):
def __init__(self, **args):
"""
Initialize a logger, looking for part restrictions in kwargs.
"""
@ -198,18 +198,18 @@ class _Logger (abc.ABC):
args.update(kwargs)
return args
def get_charset_encoding (self):
def get_charset_encoding(self):
"""Translate the output encoding to a charset encoding name."""
if self.output_encoding == "utf-8-sig":
return "utf-8"
return self.output_encoding
def encode (self, s):
def encode(self, s):
"""Encode string with output encoding."""
assert isinstance(s, str_text)
return s.encode(self.output_encoding, self.codec_errors)
def init_fileoutput (self, args):
def init_fileoutput(self, args):
"""
Initialize self.fd file descriptor from args. For file output
(used when the fileoutput arg is given), the self.fd
@ -226,7 +226,7 @@ class _Logger (abc.ABC):
else:
self.fd = self.create_fd()
def start_fileoutput (self):
def start_fileoutput(self):
"""Start output to configured file."""
path = os.path.dirname(self.filename)
try:
@ -243,7 +243,7 @@ class _Logger (abc.ABC):
self.is_active = False
self.filename = None
def create_fd (self):
def create_fd(self):
"""Create open file descriptor."""
if self.filename is None:
return i18n.get_encoded_writer(encoding=self.output_encoding,
@ -251,7 +251,7 @@ class _Logger (abc.ABC):
return codecs.open(self.filename, "wb", self.output_encoding,
self.codec_errors)
def close_fileoutput (self):
def close_fileoutput(self):
"""
Flush and close the file output denoted by self.fd.
"""
@ -269,7 +269,7 @@ class _Logger (abc.ABC):
pass
self.fd = None
def check_date (self):
def check_date(self):
"""
Check for special dates.
"""
@ -278,14 +278,14 @@ class _Logger (abc.ABC):
msg = _("Happy birthday for LinkChecker, I'm %d years old today!")
self.comment(msg % (now.year - 2000))
def comment (self, s, **args):
def comment(self, s, **args):
"""
Write a comment and a newline. This method just prints
the given string.
"""
self.writeln(s=s, **args)
def wrap (self, lines, width):
def wrap(self, lines, width):
"""
Return wrapped version of given lines.
"""
@ -297,7 +297,7 @@ class _Logger (abc.ABC):
break_on_hyphens=False)
return strformat.wrap(text, width, **kwargs).lstrip()
def write (self, s, **args):
def write(self, s, **args):
"""Write string to output descriptor. Strips control characters
from string before writing.
"""
@ -318,13 +318,13 @@ class _Logger (abc.ABC):
self.fd = dummy.Dummy()
self.is_active = False
def writeln (self, s="", **args):
def writeln(self, s="", **args):
"""
Write string to output descriptor plus a newline.
"""
self.write("%s%s" % (s, os.linesep), **args)
def has_part (self, name):
def has_part(self, name):
"""
See if given part name will be logged.
"""
@ -333,19 +333,19 @@ class _Logger (abc.ABC):
return True
return name in self.logparts
def part (self, name):
def part(self, name):
"""
Return translated part name.
"""
return _(Fields.get(name, ""))
def spaces (self, name):
def spaces(self, name):
"""
Return indent of spaces for given part name.
"""
return self.logspaces[name]
def start_output (self):
def start_output(self):
"""
Start log output.
"""
@ -363,7 +363,7 @@ class _Logger (abc.ABC):
self.stats.reset()
self.starttime = time.time()
def log_filter_url (self, url_data, do_print):
def log_filter_url(self, url_data, do_print):
"""
Log a new url with this logger if do_print is True. Else
only update accounting data.
@ -372,7 +372,7 @@ class _Logger (abc.ABC):
if do_print:
self.log_url(url_data)
def write_intro (self):
def write_intro(self):
"""Write intro comments."""
self.comment(_("created by %(app)s at %(time)s") %
{"app": configuration.AppName,
@ -383,7 +383,7 @@ class _Logger (abc.ABC):
{'url': configuration.SupportUrl})
self.check_date()
def write_outro (self):
def write_outro(self):
"""Write outro comments."""
self.stoptime = time.time()
duration = self.stoptime - self.starttime
@ -392,32 +392,32 @@ class _Logger (abc.ABC):
"duration": strformat.strduration_long(duration)})
@abc.abstractmethod
def log_url (self, url_data):
def log_url(self, url_data):
"""
Log a new url with this logger.
"""
pass
@abc.abstractmethod
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""
End of output, used for cleanup (eg output buffer flushing).
"""
pass
def __str__ (self):
def __str__(self):
"""
Return class name.
"""
return self.__class__.__name__
def __repr__ (self):
def __repr__(self):
"""
Return class name.
"""
return repr(self.__class__.__name__)
def flush (self):
def flush(self):
"""
If the logger has internal buffers, flush them.
Ignore flush I/O errors since we are not responsible for proper
@ -429,7 +429,7 @@ class _Logger (abc.ABC):
except (IOError, AttributeError):
pass
def log_internal_error (self):
def log_internal_error(self):
"""Indicate that an internal error occurred in the program."""
log.warn(LOG_CHECK, "internal error occurred")
self.stats.log_internal_error()

View file

@ -23,7 +23,7 @@ from linkcheck.configuration import get_user_data
from . import _Logger
class BlacklistLogger (_Logger):
class BlacklistLogger(_Logger):
"""
Updates a blacklist of wrong links. If a link on the blacklist
is working (again), it is removed from the list. So after n days
@ -36,7 +36,7 @@ class BlacklistLogger (_Logger):
"filename": os.path.join(get_user_data(), "blacklist"),
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Intialize with old blacklist data (if found, else not)."""
args = self.get_args(kwargs)
super(BlacklistLogger, self).__init__(**args)
@ -45,13 +45,13 @@ class BlacklistLogger (_Logger):
if self.filename is not None and os.path.exists(self.filename):
self.read_blacklist()
def comment (self, s, **args):
def comment(self, s, **args):
"""
Write nothing.
"""
pass
def log_url (self, url_data):
def log_url(self, url_data):
"""
Put invalid url in blacklist, delete valid url from blacklist.
"""
@ -66,13 +66,13 @@ class BlacklistLogger (_Logger):
if not url_data.valid:
self.blacklist[key] = 1
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""
Write blacklist file.
"""
self.write_blacklist()
def read_blacklist (self):
def read_blacklist(self):
"""
Read a previously stored blacklist from file fd.
"""
@ -85,7 +85,7 @@ class BlacklistLogger (_Logger):
value, key = line.split(None, 1)
self.blacklist[key] = int(value)
def write_blacklist (self):
def write_blacklist(self):
"""
Write the blacklist.
"""

View file

@ -30,7 +30,7 @@ Columns = (
)
class CSVLogger (_Logger):
class CSVLogger(_Logger):
"""
CSV output, consisting of one line per entry. Entries are
separated by a separator (a semicolon per default).
@ -45,7 +45,7 @@ class CSVLogger (_Logger):
"dialect": "excel",
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Store default separator and (os dependent) line terminator."""
args = self.get_args(kwargs)
super(CSVLogger, self).__init__(**args)
@ -55,11 +55,11 @@ class CSVLogger (_Logger):
self.dialect = args['dialect']
self.linesep = os.linesep
def comment (self, s, **args):
def comment(self, s, **args):
"""Write CSV comment."""
self.writeln(s="# %s" % s, **args)
def start_output (self):
def start_output(self):
"""Write checking start info as csv comment."""
super(CSVLogger, self).start_output()
row = []
@ -79,7 +79,7 @@ class CSVLogger (_Logger):
if row:
self.writerow(row)
def log_url (self, url_data):
def log_url(self, url_data):
"""Write csv formatted url check info."""
row = []
if self.has_part("urlname"):
@ -119,7 +119,7 @@ class CSVLogger (_Logger):
self.writerow(map(strformat.unicode_safe, row))
self.flush()
def writerow (self, row):
def writerow(self, row):
"""Write one row in CSV format."""
self.writer.writerow(row)
# Fetch UTF-8 output from the queue ...
@ -134,7 +134,7 @@ class CSVLogger (_Logger):
self.queue.seek(0)
self.queue.truncate(0)
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Write end of checking info as csv comment."""
if self.has_part("outro"):
self.write_outro()

View file

@ -21,7 +21,7 @@ from .. import strformat
from builtins import str as str_text
class CustomXMLLogger (xmllog._XMLLogger):
class CustomXMLLogger(xmllog._XMLLogger):
"""
XML custom output for easy post-processing.
"""
@ -32,7 +32,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
"filename": "linkchecker-out.xml",
}
def start_output (self):
def start_output(self):
"""
Write start of checking info as xml comment.
"""
@ -42,7 +42,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
self.xml_starttag('linkchecker', attrs)
self.flush()
def log_url (self, url_data):
def log_url(self, url_data):
"""
Log URL data in custom XML format.
"""
@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
self.xml_endtag('urldata')
self.flush()
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""
Write XML end tag.
"""

View file

@ -20,7 +20,7 @@ http://www.graphviz.org/doc/info/lang.html
from .graph import _GraphLogger
class DOTLogger (_GraphLogger):
class DOTLogger(_GraphLogger):
"""
Generates .dot sitemap graphs. Use graphviz to see the sitemap graph.
"""
@ -32,7 +32,7 @@ class DOTLogger (_GraphLogger):
"encoding": "ascii",
}
def start_output (self):
def start_output(self):
"""Write start of checking info as DOT comment."""
super(DOTLogger, self).start_output()
if self.has_part("intro"):
@ -44,12 +44,12 @@ class DOTLogger (_GraphLogger):
self.writeln(" ];")
self.flush()
def comment (self, s, **args):
def comment(self, s, **args):
"""Write DOT comment."""
self.write("// ")
self.writeln(s=s, **args)
def log_url (self, url_data):
def log_url(self, url_data):
"""Write one node."""
node = self.get_node(url_data)
if node is not None:
@ -66,7 +66,7 @@ class DOTLogger (_GraphLogger):
self.writeln(" extern=%d," % node["extern"])
self.writeln(" ];")
def write_edge (self, node):
def write_edge(self, node):
"""Write edge from parent to node."""
source = dotquote(self.nodes[node["parent_url"]]["label"])
target = dotquote(node["label"])
@ -76,11 +76,11 @@ class DOTLogger (_GraphLogger):
self.writeln(" valid=%d," % node["valid"])
self.writeln(" ];")
def end_graph (self):
def end_graph(self):
"""Write end of graph marker."""
self.writeln("}")
def dotquote (s):
def dotquote(s):
"""Quote string for usage in DOT output format."""
return s.replace('"', '\\"')

View file

@ -19,7 +19,7 @@ A gml logger.
from .graph import _GraphLogger
class GMLLogger (_GraphLogger):
class GMLLogger(_GraphLogger):
"""GML means Graph Modeling Language. Use a GML tool to see
the sitemap graph."""
@ -29,7 +29,7 @@ class GMLLogger (_GraphLogger):
"filename": "linkchecker-out.gml",
}
def start_output (self):
def start_output(self):
"""Write start of checking info as gml comment."""
super(GMLLogger, self).start_output()
if self.has_part("intro"):
@ -39,11 +39,11 @@ class GMLLogger (_GraphLogger):
self.writeln(" directed 1")
self.flush()
def comment (self, s, **args):
def comment(self, s, **args):
"""Write GML comment."""
self.writeln(s='comment "%s"' % s, **args)
def log_url (self, url_data):
def log_url(self, url_data):
"""Write one node."""
node = self.get_node(url_data)
if node:
@ -62,7 +62,7 @@ class GMLLogger (_GraphLogger):
self.writeln(" extern %d" % node["extern"])
self.writeln(" ]")
def write_edge (self, node):
def write_edge(self, node):
"""Write one edge."""
self.writeln(" edge [")
self.writeln(' label "%s"' % node["edge"])
@ -72,6 +72,6 @@ class GMLLogger (_GraphLogger):
self.writeln(" valid %d" % node["valid"])
self.writeln(" ]")
def end_graph (self):
def end_graph(self):
"""Write end of graph marker."""
self.writeln("]")

View file

@ -21,10 +21,10 @@ from ..decorators import notimplemented
import re
class _GraphLogger (_Logger):
class _GraphLogger(_Logger):
"""Provide base method to get node data."""
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize graph node list and internal id counter."""
args = self.get_args(kwargs)
super(_GraphLogger, self).__init__(**args)
@ -41,7 +41,7 @@ class _GraphLogger (_Logger):
if url_data.valid:
self.log_url(url_data)
def get_node (self, url_data):
def get_node(self, url_data):
"""Return new node data or None if node already exists."""
if not url_data.url:
return None
@ -63,7 +63,7 @@ class _GraphLogger (_Logger):
self.nodeid += 1
return node
def write_edges (self):
def write_edges(self):
"""
Write all edges we can find in the graph in a brute-force manner.
"""
@ -73,16 +73,16 @@ class _GraphLogger (_Logger):
self.flush()
@notimplemented
def write_edge (self, node):
def write_edge(self, node):
"""Write edge data for one node and its parent."""
pass
@notimplemented
def end_graph (self):
def end_graph(self):
"""Write end-of-graph marker."""
pass
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Write edges and end of checking info as gml comment."""
self.write_edges()
self.end_graph()
@ -93,7 +93,7 @@ class _GraphLogger (_Logger):
_disallowed = re.compile(r"[^a-zA-Z0-9 '#(){}\-\[\]\.,;:\!\?]+")
def quote (s):
def quote(s):
"""Replace disallowed characters in node or edge labels.
Also remove whitespace from beginning or end of label."""
return _disallowed.sub(" ", s).strip()

View file

@ -21,7 +21,7 @@ from .xmllog import _XMLLogger
from .graph import _GraphLogger
class GraphXMLLogger (_XMLLogger, _GraphLogger):
class GraphXMLLogger(_XMLLogger, _GraphLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
@ -31,14 +31,14 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
"filename": "linkchecker-out.gxml",
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize graph node list and internal id counter."""
args = self.get_args(kwargs)
super(GraphXMLLogger, self).__init__(**args)
self.nodes = {}
self.nodeid = 0
def start_output (self):
def start_output(self):
"""Write start of checking info as xml comment."""
super(GraphXMLLogger, self).start_output()
self.xml_start_output()
@ -46,7 +46,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_starttag('graph', attrs={"isDirected": "true"})
self.flush()
def log_url (self, url_data):
def log_url(self, url_data):
"""Write one node and all possible edges."""
node = self.get_node(url_data)
if node:
@ -66,7 +66,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_endtag("data")
self.xml_endtag("node")
def write_edge (self, node):
def write_edge(self, node):
"""Write one edge."""
attrs = {
"source": "%d" % self.nodes[node["parent_url"]]["id"],
@ -80,7 +80,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
self.xml_endtag("data")
self.xml_endtag("edge")
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Finish graph output, and print end of checking info as xml
comment."""
self.xml_endtag("graph")

View file

@ -59,7 +59,7 @@ HTML_HEADER = """<!DOCTYPE HTML>
"""
class HtmlLogger (_Logger):
class HtmlLogger(_Logger):
"""Logger with HTML output."""
LoggerName = 'html'
@ -75,7 +75,7 @@ class HtmlLogger (_Logger):
'colorok': '#3ba557',
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize default HTML color values."""
args = self.get_args(kwargs)
super(HtmlLogger, self).__init__(**args)
@ -88,17 +88,17 @@ class HtmlLogger (_Logger):
self.colorerror = args['colorerror']
self.colorok = args['colorok']
def part (self, name):
def part(self, name):
"""Return non-space-breakable part name."""
return super(HtmlLogger, self).part(name).replace(" ", "&nbsp;")
def comment (self, s, **args):
def comment(self, s, **args):
"""Write HTML comment."""
self.write("<!-- ")
self.write(s, **args)
self.write(" -->")
def start_output (self):
def start_output(self):
"""Write start of checking info."""
super(HtmlLogger, self).start_output()
header = {
@ -125,7 +125,7 @@ class HtmlLogger (_Logger):
self.check_date()
self.flush()
def log_url (self, url_data):
def log_url(self, url_data):
"""Write url checking info as HTML."""
self.write_table_start()
if self.has_part("url"):
@ -155,21 +155,21 @@ class HtmlLogger (_Logger):
self.write_table_end()
self.flush()
def write_table_start (self):
def write_table_start(self):
"""Start html table."""
self.writeln('<br/><br/><table>')
def write_table_end (self):
def write_table_end(self):
"""End html table."""
self.write('</table><br/>')
def write_id (self):
def write_id(self):
"""Write ID for current URL."""
self.writeln("<tr>")
self.writeln('<td>%s</td>' % self.part("id"))
self.write("<td>%d</td></tr>" % self.stats.number)
def write_url (self, url_data):
def write_url(self, url_data):
"""Write url_data.base_url."""
self.writeln("<tr>")
self.writeln('<td class="url">%s</td>' % self.part("url"))
@ -177,12 +177,12 @@ class HtmlLogger (_Logger):
self.write("`%s'" % html.escape(url_data.base_url))
self.writeln("</td></tr>")
def write_name (self, url_data):
def write_name(self, url_data):
"""Write url_data.name."""
args = (self.part("name"), html.escape(url_data.name))
self.writeln("<tr><td>%s</td><td>`%s'</td></tr>" % args)
def write_parent (self, url_data):
def write_parent(self, url_data):
"""Write url_data.parent_url."""
self.write("<tr><td>"+self.part("parenturl")+
'</td><td><a target="top" href="'+
@ -203,35 +203,35 @@ class HtmlLogger (_Logger):
self.write('(<a href="'+vcss+'">CSS</a>)')
self.writeln("</td></tr>")
def write_base (self, url_data):
def write_base(self, url_data):
"""Write url_data.base_ref."""
self.writeln("<tr><td>"+self.part("base")+"</td><td>"+
html.escape(url_data.base_ref)+"</td></tr>")
def write_real (self, url_data):
def write_real(self, url_data):
"""Write url_data.url."""
self.writeln("<tr><td>"+self.part("realurl")+"</td><td>"+
'<a target="top" href="'+url_data.url+
'">'+html.escape(url_data.url)+"</a></td></tr>")
def write_dltime (self, url_data):
def write_dltime(self, url_data):
"""Write url_data.dltime."""
self.writeln("<tr><td>"+self.part("dltime")+"</td><td>"+
(_("%.3f seconds") % url_data.dltime)+
"</td></tr>")
def write_size (self, url_data):
def write_size(self, url_data):
"""Write url_data.size."""
self.writeln("<tr><td>"+self.part("dlsize")+"</td><td>"+
strformat.strsize(url_data.size)+
"</td></tr>")
def write_checktime (self, url_data):
def write_checktime(self, url_data):
"""Write url_data.checktime."""
self.writeln("<tr><td>"+self.part("checktime")+"</td><td>"+
(_("%.3f seconds") % url_data.checktime)+"</td></tr>")
def write_info (self, url_data):
def write_info(self, url_data):
"""Write url_data.info."""
sep = "<br/>"+os.linesep
text = sep.join(html.escape(x) for x in url_data.info)
@ -244,7 +244,7 @@ class HtmlLogger (_Logger):
self.writeln('<tr><td valign="top">' + self.part("modified") +
"</td><td>"+text+"</td></tr>")
def write_warning (self, url_data):
def write_warning(self, url_data):
"""Write url_data.warnings."""
sep = "<br/>"+os.linesep
text = sep.join(html.escape(x[1]) for x in url_data.warnings)
@ -252,7 +252,7 @@ class HtmlLogger (_Logger):
'valign="top">' + self.part("warning") +
'</td><td class="warning">' + text + "</td></tr>")
def write_result (self, url_data):
def write_result(self, url_data):
"""Write url_data.result."""
if url_data.valid:
self.write('<tr><td class="valid">')
@ -268,7 +268,7 @@ class HtmlLogger (_Logger):
self.write(": "+html.escape(url_data.result))
self.writeln("</td></tr>")
def write_stats (self):
def write_stats(self):
"""Write check statistic infos."""
self.writeln('<br/><i>%s</i><br/>' % _("Statistics"))
if self.stats.number > 0:
@ -285,7 +285,7 @@ class HtmlLogger (_Logger):
self.writeln(_("No statistics available since no URLs were checked."))
self.writeln("<br/>")
def write_outro (self):
def write_outro(self):
"""Write end of check message."""
self.writeln("<br/>")
self.write(_("That's it.")+" ")
@ -326,7 +326,7 @@ class HtmlLogger (_Logger):
configuration.SupportUrl+"</a>.<br/>"))
self.writeln("</small></body></html>")
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Write end of checking info as HTML."""
if self.has_part("stats"):
self.write_stats()

View file

@ -19,30 +19,30 @@ A dummy logger.
from . import _Logger
class NoneLogger (_Logger):
class NoneLogger(_Logger):
"""
Dummy logger printing nothing.
"""
LoggerName = 'none'
def comment (self, s, **args):
def comment(self, s, **args):
"""
Do nothing.
"""
pass
def start_output (self):
def start_output(self):
"""
Do nothing.
"""
pass
def log_url (self, url_data):
def log_url(self, url_data):
"""Do nothing."""
pass
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""
Do nothing.
"""

View file

@ -32,7 +32,7 @@ ChangeFreqs = (
HTTP_SCHEMES = ('http:', 'https:')
HTML_TYPES = ('text/html', "application/xhtml+xml")
class SitemapXmlLogger (xmllog._XMLLogger):
class SitemapXmlLogger(xmllog._XMLLogger):
"""Sitemap XML output according to http://www.sitemaps.org/protocol.html
"""
@ -43,7 +43,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
"encoding": "utf-8",
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize graph node list and internal id counter."""
args = self.get_args(kwargs)
super(SitemapXmlLogger, self).__init__(**args)
@ -63,7 +63,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
if 'priority' in args:
self.priority = float(args['priority'])
def start_output (self):
def start_output(self):
"""Write start of checking info as xml comment."""
super(SitemapXmlLogger, self).start_output()
self.xml_start_output()
@ -101,7 +101,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
and url_data.content_type in HTML_TYPES):
self.log_url(url_data, priority=priority)
def log_url (self, url_data, priority=None):
def log_url(self, url_data, priority=None):
"""Log URL data in sitemap format."""
self.xml_starttag('url')
self.xml_tag('loc', url_data.url)
@ -112,7 +112,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
self.xml_endtag('url')
self.flush()
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Write XML end tag."""
self.xml_endtag("urlset")
self.xml_end_output()

View file

@ -22,7 +22,7 @@ from . import _Logger
from .. import url as urlutil
def sqlify (s):
def sqlify(s):
"""
Escape special SQL chars and strings.
"""
@ -31,7 +31,7 @@ def sqlify (s):
return "'%s'" % s.replace("'", "''").replace(os.linesep, r"\n")
def intify (s):
def intify(s):
"""
Coerce a truth value to 0/1.
@ -45,7 +45,7 @@ def intify (s):
return 0
class SQLLogger (_Logger):
class SQLLogger(_Logger):
"""
SQL output, should work with any SQL database (not tested).
"""
@ -58,7 +58,7 @@ class SQLLogger (_Logger):
'dbname': 'linksdb',
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize database access data."""
args = self.get_args(kwargs)
super(SQLLogger, self).__init__(**args)
@ -66,14 +66,14 @@ class SQLLogger (_Logger):
self.dbname = args['dbname']
self.separator = args['separator']
def comment (self, s, **args):
def comment(self, s, **args):
"""
Write SQL comment.
"""
self.write("-- ")
self.writeln(s=s, **args)
def start_output (self):
def start_output(self):
"""
Write start of checking info as sql comment.
"""
@ -83,7 +83,7 @@ class SQLLogger (_Logger):
self.writeln()
self.flush()
def log_url (self, url_data):
def log_url(self, url_data):
"""
Store url check info into the database.
"""
@ -130,7 +130,7 @@ class SQLLogger (_Logger):
})
self.flush()
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""
Write end of checking info as sql comment.
"""

View file

@ -22,7 +22,7 @@ from .. import ansicolor, strformat, configuration, i18n
from builtins import str as str_text
class TextLogger (_Logger):
class TextLogger(_Logger):
"""
A text logger, colorizing the output if possible.
@ -52,7 +52,7 @@ class TextLogger (_Logger):
'colorreset': "default",
}
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""Initialize error counter and optional file output."""
args = self.get_args(kwargs)
super(TextLogger, self).__init__(**args)
@ -71,27 +71,27 @@ class TextLogger (_Logger):
self.colordlsize = args.get('colordlsize', 'default')
self.colorreset = args.get('colorreset', 'default')
def init_fileoutput (self, args):
def init_fileoutput(self, args):
"""Colorize file output if possible."""
super(TextLogger, self).init_fileoutput(args)
if self.fd is not None:
self.fd = ansicolor.Colorizer(self.fd)
def start_fileoutput (self):
def start_fileoutput(self):
"""Needed to make file descriptor color aware."""
init_color = self.fd is None
super(TextLogger, self).start_fileoutput()
if init_color:
self.fd = ansicolor.Colorizer(self.fd)
def start_output (self):
def start_output(self):
"""Write generic start checking info."""
super(TextLogger, self).start_output()
if self.has_part('intro'):
self.write_intro()
self.flush()
def write_intro (self):
def write_intro(self):
"""Log introduction text."""
self.writeln(configuration.AppInfo)
self.writeln(configuration.Freeware)
@ -104,7 +104,7 @@ class TextLogger (_Logger):
self.writeln(_("Start checking at %s") %
strformat.strtime(self.starttime))
def log_url (self, url_data):
def log_url(self, url_data):
"""Write url checking info."""
self.writeln()
if self.has_part('url'):
@ -133,24 +133,24 @@ class TextLogger (_Logger):
self.write_result(url_data)
self.flush()
def write_id (self):
def write_id(self):
"""Write unique ID of url_data."""
self.writeln()
self.write(self.part('id') + self.spaces('id'))
self.writeln("%d" % self.stats.number, color=self.colorinfo)
def write_url (self, url_data):
def write_url(self, url_data):
"""Write url_data.base_url."""
self.write(self.part('url') + self.spaces('url'))
txt = strformat.strline(url_data.base_url)
self.writeln(txt, color=self.colorurl)
def write_name (self, url_data):
def write_name(self, url_data):
"""Write url_data.name."""
self.write(self.part("name") + self.spaces("name"))
self.writeln(strformat.strline(url_data.name), color=self.colorname)
def write_parent (self, url_data):
def write_parent(self, url_data):
"""Write url_data.parent_url."""
self.write(self.part('parenturl') + self.spaces("parenturl"))
txt = url_data.parent_url
@ -162,35 +162,35 @@ class TextLogger (_Logger):
txt += _(", page %d") % url_data.page
self.writeln(txt, color=self.colorparent)
def write_base (self, url_data):
def write_base(self, url_data):
"""Write url_data.base_ref."""
self.write(self.part("base") + self.spaces("base"))
self.writeln(url_data.base_ref, color=self.colorbase)
def write_real (self, url_data):
def write_real(self, url_data):
"""Write url_data.url."""
self.write(self.part("realurl") + self.spaces("realurl"))
self.writeln(str_text(url_data.url), color=self.colorreal)
def write_dltime (self, url_data):
def write_dltime(self, url_data):
"""Write url_data.dltime."""
self.write(self.part("dltime") + self.spaces("dltime"))
self.writeln(_("%.3f seconds") % url_data.dltime,
color=self.colordltime)
def write_size (self, url_data):
def write_size(self, url_data):
"""Write url_data.size."""
self.write(self.part("dlsize") + self.spaces("dlsize"))
self.writeln(strformat.strsize(url_data.size),
color=self.colordlsize)
def write_checktime (self, url_data):
def write_checktime(self, url_data):
"""Write url_data.checktime."""
self.write(self.part("checktime") + self.spaces("checktime"))
self.writeln(_("%.3f seconds") % url_data.checktime,
color=self.colordltime)
def write_info (self, url_data):
def write_info(self, url_data):
"""Write url_data.info."""
self.write(self.part("info") + self.spaces("info"))
self.writeln(self.wrap(url_data.info, 65), color=self.colorinfo)
@ -200,13 +200,13 @@ class TextLogger (_Logger):
self.write(self.part("modified") + self.spaces("modified"))
self.writeln(self.format_modified(url_data.modified))
def write_warning (self, url_data):
def write_warning(self, url_data):
"""Write url_data.warning."""
self.write(self.part("warning") + self.spaces("warning"))
warning_msgs = ["[%s] %s" % x for x in url_data.warnings]
self.writeln(self.wrap(warning_msgs, 65), color=self.colorwarning)
def write_result (self, url_data):
def write_result(self, url_data):
"""Write url_data.result."""
self.write(self.part("result") + self.spaces("result"))
if url_data.valid:
@ -219,7 +219,7 @@ class TextLogger (_Logger):
self.write(": " + url_data.result, color=color)
self.writeln()
def write_outro (self, interrupt=False):
def write_outro(self, interrupt=False):
"""Write end of checking message."""
self.writeln()
if interrupt:
@ -264,7 +264,7 @@ class TextLogger (_Logger):
{"time": strformat.strtime(self.stoptime),
"duration": strformat.strduration_long(duration)})
def write_stats (self):
def write_stats(self):
"""Write check statistic info."""
self.writeln()
self.writeln(_("Statistics:"))
@ -282,7 +282,7 @@ class TextLogger (_Logger):
else:
self.writeln(_("No statistics available since no URLs were checked."))
def end_output (self, **kwargs):
def end_output(self, **kwargs):
"""Write end of output info, and flush all output buffers."""
self.stats.downloaded_bytes = kwargs.get("downloaded_bytes")
self.stats.num_urls = kwargs.get("num_urls")

View file

@ -29,24 +29,24 @@ xmlattr_entities = {
}
def xmlquote (s):
def xmlquote(s):
"""
Quote characters for XML.
"""
return xml.sax.saxutils.escape(s)
def xmlquoteattr (s):
def xmlquoteattr(s):
"""
Quote XML attribute, ready for inclusion with double quotes.
"""
return xml.sax.saxutils.escape(s, xmlattr_entities)
class _XMLLogger (_Logger):
class _XMLLogger(_Logger):
"""Base class for XML output; easy to parse with any XML tool."""
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
""" Initialize graph node list and internal id counter. """
args = self.get_args(kwargs)
super(_XMLLogger, self).__init__(**args)
@ -54,7 +54,7 @@ class _XMLLogger (_Logger):
self.indent = " "
self.level = 0
def comment (self, s, **args):
def comment(self, s, **args):
"""
Write XML comment.
"""
@ -62,7 +62,7 @@ class _XMLLogger (_Logger):
self.write(s, **args)
self.writeln(" -->")
def xml_start_output (self):
def xml_start_output(self):
"""
Write start of checking info as xml comment.
"""
@ -72,14 +72,14 @@ class _XMLLogger (_Logger):
self.write_intro()
self.writeln()
def xml_end_output (self):
def xml_end_output(self):
"""
Write end of checking info as xml comment.
"""
if self.has_part("outro"):
self.write_outro()
def xml_starttag (self, name, attrs=None):
def xml_starttag(self, name, attrs=None):
"""
Write XML start tag.
"""
@ -92,7 +92,7 @@ class _XMLLogger (_Logger):
self.writeln(">")
self.level += 1
def xml_endtag (self, name):
def xml_endtag(self, name):
"""
Write XML end tag.
"""
@ -101,7 +101,7 @@ class _XMLLogger (_Logger):
self.write(self.indent*self.level)
self.writeln("</%s>" % xmlquote(name))
def xml_tag (self, name, content, attrs=None):
def xml_tag(self, name, content, attrs=None):
"""
Write XML tag with content.
"""

View file

@ -35,7 +35,7 @@ _proc_status = '/proc/%d/status' % os.getpid()
_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
'KB': 1024.0, 'MB': 1024.0*1024.0}
def _VmB (VmKey):
def _VmB(VmKey):
"""Parse /proc/<pid>/status file for given key.
@return: requested number value of status entry
@ -62,7 +62,7 @@ def _VmB (VmKey):
return float(v[1]) * _scale[v[2]]
def memory (since=0.0):
def memory(since=0.0):
"""Get memory usage.
@return: memory usage in bytes
@ -71,7 +71,7 @@ def memory (since=0.0):
return _VmB('VmSize:') - since
def resident (since=0.0):
def resident(since=0.0):
"""Get resident memory usage.
@return: resident memory usage in bytes
@ -80,7 +80,7 @@ def resident (since=0.0):
return _VmB('VmRSS:') - since
def stacksize (since=0.0):
def stacksize(since=0.0):
"""Get stack size.
@return: stack size in bytes

View file

@ -59,7 +59,7 @@ PARSE_CONTENTS = {
"application/xml+sitemap": re.compile(r'<\?xml[^<]+<urlset\s+', re.IGNORECASE),
}
def guess_mimetype (filename, read=None):
def guess_mimetype(filename, read=None):
"""Return MIME type of file, or 'application/octet-stream' if it could
not be determined."""
mime, encoding = None, None

View file

@ -22,7 +22,7 @@ import re
import socket
from .. import log, LOG_CHECK
def is_valid_ip (ip):
def is_valid_ip(ip):
"""
Return True if given ip is a valid IPv4 or IPv6 address.
"""
@ -33,7 +33,7 @@ def is_valid_ip (ip):
return True
def resolve_host (host):
def resolve_host(host):
"""
@host: hostname or IP address
Return list of ip numbers for given host.

View file

@ -41,35 +41,35 @@ def parse_url(url_data):
url_data.aggregate.plugin_manager.run_parser_plugins(url_data, pagetype=key)
def parse_html (url_data):
def parse_html(url_data):
"""Parse into HTML content and search for URLs to check.
Found URLs are added to the URL queue.
"""
linkparse.find_links(url_data.get_soup(), url_data.add_url, linkparse.LinkTags)
def parse_opera (url_data):
def parse_opera(url_data):
"""Parse an opera bookmark file."""
from ..bookmarks.opera import parse_bookmark_data
for url, name, lineno in parse_bookmark_data(url_data.get_content()):
url_data.add_url(url, line=lineno, name=name)
def parse_chromium (url_data):
def parse_chromium(url_data):
"""Parse a Chromium or Google Chrome bookmark file."""
from ..bookmarks.chromium import parse_bookmark_data
for url, name in parse_bookmark_data(url_data.get_content()):
url_data.add_url(url, name=name)
def parse_safari (url_data):
def parse_safari(url_data):
"""Parse a Safari bookmark file."""
from ..bookmarks.safari import parse_bookmark_data
for url, name in parse_bookmark_data(url_data.get_raw_content()):
url_data.add_url(url, name=name)
def parse_text (url_data):
def parse_text(url_data):
"""Parse a text file with one url per line; comment and blank
lines are ignored."""
lineno = 0
@ -81,7 +81,7 @@ def parse_text (url_data):
url_data.add_url(line, line=lineno)
def parse_css (url_data):
def parse_css(url_data):
"""
Parse a CSS file for url() patterns.
"""
@ -96,7 +96,7 @@ def parse_css (url_data):
url_data.add_url(url, line=lineno, column=column)
def parse_swf (url_data):
def parse_swf(url_data):
"""Parse a SWF file for URLs."""
linkfinder = linkparse.swf_url_re.finditer
for mo in linkfinder(url_data.get_raw_content()):
@ -107,14 +107,14 @@ def parse_swf (url_data):
url_data.add_url(url)
def parse_wml (url_data):
def parse_wml(url_data):
"""Parse into WML content and search for URLs to check.
Found URLs are added to the URL queue.
"""
linkparse.find_links(url_data.get_soup(), url_data.add_url, linkparse.WmlTags)
def parse_firefox (url_data):
def parse_firefox(url_data):
"""Parse a Firefox3 bookmark file."""
filename = url_data.get_os_filename()
for url, name in firefox.parse_bookmark_file(filename):

View file

@ -39,7 +39,7 @@ class AnchorCheck(_ContentPlugin):
linkparse.AnchorTags)
self.check_anchor(url_data)
def add_anchor (self, url, line, column, name, base):
def add_anchor(self, url, line, column, name, base):
"""Add anchor URL."""
self.anchors.append((url, line, column, name, base))

View file

@ -50,7 +50,7 @@ class LocationInfo(_ConnectionPlugin):
# no risks should be taken here by using a lock.
_lock = get_lock("geoip")
def get_geoip_dat ():
def get_geoip_dat():
"""Find a GeoIP database, preferring city over country lookup."""
datafiles = ("GeoIPCity.dat", "GeoIP.dat")
if os.name == 'nt':
@ -85,7 +85,7 @@ if geoip_dat:
@synchronized(_lock)
def get_location (host):
def get_location(host):
"""Get translated country and optional city name.
@return: country with optional city or an boolean False if not found

View file

@ -29,7 +29,7 @@ from .. import fileutil, log, LOG_PLUGIN
_initialized = False
def init_win32com ():
def init_win32com():
"""Initialize the win32com.client cache."""
global _initialized
if _initialized:
@ -47,7 +47,7 @@ def init_win32com ():
_initialized = True
def has_word ():
def has_word():
"""Determine if Word is available on the current system."""
if not has_win32com:
return False
@ -64,13 +64,13 @@ def has_word ():
return False
def constants (name):
def constants(name):
"""Helper to return constants. Avoids importing win32com.client in
other modules."""
return getattr(win32com.client.constants, name)
def get_word_app ():
def get_word_app():
"""Return open Word.Application handle, or None if Word is not available
on this system."""
if not has_word():
@ -84,18 +84,18 @@ def get_word_app ():
return app
def close_word_app (app):
def close_word_app(app):
"""Close Word application object."""
app.Quit()
def open_wordfile (app, filename):
def open_wordfile(app, filename):
"""Open given Word file with application object."""
return app.Documents.Open(filename, ReadOnly=True,
AddToRecentFiles=False, Visible=False, NoEncodingDialog=True)
def close_wordfile (doc):
def close_wordfile(doc):
"""Close word file."""
doc.Close()
@ -155,7 +155,7 @@ def get_line_number(doc, wrange):
return lineno
def get_temp_filename (content):
def get_temp_filename(content):
"""Get temporary filename for content to parse."""
# store content in temporary file
fd, filename = fileutil.get_temp_file(mode='wb', suffix='.doc',

View file

@ -36,7 +36,7 @@ class W3Timer:
self.last_w3_call = 0
@synchronized(_w3_time_lock)
def check_w3_time (self):
def check_w3_time(self):
"""Make sure the W3C validators are at most called once a second."""
if time.time() - self.last_w3_call < W3Timer.SleepSeconds:
time.sleep(W3Timer.SleepSeconds)
@ -111,7 +111,7 @@ class CssSyntaxCheck(_ContentPlugin):
log.warn(LOG_PLUGIN, _("CSS syntax check plugin error: %(msg)s ") % {"msg": msg})
def check_w3_errors (url_data, xml, w3type):
def check_w3_errors(url_data, xml, w3type):
"""Add warnings for W3C HTML or CSS errors in xml format.
w3type is either "W3C HTML" or "W3C CSS"."""
dom = parseString(xml)
@ -126,7 +126,7 @@ def check_w3_errors (url_data, xml, w3type):
url_data.add_warning(warnmsg % attrs)
def getXmlText (parent, tag):
def getXmlText(parent, tag):
"""Return XML content of given tag in parent element."""
elem = parent.getElementsByTagName(tag)[0]
# Yes, the DOM standard is awful.

View file

@ -65,7 +65,7 @@ class VirusCheck(_ContentPlugin):
return config
class ClamavError (Exception):
class ClamavError(Exception):
"""Raised on clamav errors."""
pass
@ -73,7 +73,7 @@ class ClamavError (Exception):
class ClamdScanner:
"""Virus scanner using a clamd daemon process."""
def __init__ (self, clamav_conf):
def __init__(self, clamav_conf):
"""Initialize clamd daemon process sockets."""
self.infected = []
self.errors = []
@ -82,7 +82,7 @@ class ClamdScanner:
self.sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)
self.wsock = self.new_scansock()
def new_scansock (self):
def new_scansock(self):
"""Return a connected socket for sending scan data to it."""
port = None
try:
@ -108,11 +108,11 @@ class ClamdScanner:
raise
return wsock
def scan (self, data):
def scan(self, data):
"""Scan given data for viruses."""
self.wsock.sendall(data)
def close (self):
def close(self):
"""Get results and close clamd daemon sockets."""
self.wsock.close()
data = self.sock.recv(self.sock_rcvbuf)
@ -125,7 +125,7 @@ class ClamdScanner:
self.sock.close()
def canonical_clamav_conf ():
def canonical_clamav_conf():
"""Default clamav configs for various platforms."""
if os.name == 'posix':
clamavconf = "/etc/clamav/clamd.conf"
@ -143,16 +143,16 @@ def get_clamav_conf(filename):
log.warn(LOG_PLUGIN, "No ClamAV config file found at %r.", filename)
def get_sockinfo (host, port=None):
def get_sockinfo(host, port=None):
"""Return socket.getaddrinfo for given host and port."""
family, socktype = socket.AF_INET, socket.SOCK_STREAM
return socket.getaddrinfo(host, port, family, socktype)
class ClamavConfig (dict):
class ClamavConfig(dict):
"""Clamav configuration wrapper, with clamd connection method."""
def __init__ (self, filename):
def __init__(self, filename):
"""Parse clamav configuration file."""
super(ClamavConfig, self).__init__()
self.parseconf(filename)
@ -161,7 +161,7 @@ class ClamavConfig (dict):
if self.get('TCPSocket') and self.get('LocalSocket'):
raise ClamavError(_("only one of TCPSocket and LocalSocket must be enabled"))
def parseconf (self, filename):
def parseconf(self, filename):
"""Parse clamav configuration from given file."""
with open(filename) as fd:
# yet another config format, sigh
@ -176,7 +176,7 @@ class ClamavConfig (dict):
else:
self[split[0]] = split[1]
def new_connection (self):
def new_connection(self):
"""Connect to clamd for stream scanning.
@return: tuple (connected socket, host)
@ -191,7 +191,7 @@ class ClamavConfig (dict):
raise ClamavError(_("one of TCPSocket or LocalSocket must be enabled"))
return sock, host
def create_local_socket (self):
def create_local_socket(self):
"""Create local socket, connect to it and return socket object."""
sock = create_socket(socket.AF_UNIX, socket.SOCK_STREAM)
addr = self['LocalSocket']
@ -202,7 +202,7 @@ class ClamavConfig (dict):
raise
return sock
def create_tcp_socket (self, host):
def create_tcp_socket(self, host):
"""Create tcp socket, connect to it and return socket object."""
port = int(self['TCPSocket'])
sockinfo = get_sockinfo(host, port=port)
@ -215,7 +215,7 @@ class ClamavConfig (dict):
return sock
def scan (data, clamconf):
def scan(data, clamconf):
"""Scan data for viruses.
@return (infection msgs, errors)
@rtype ([], [])

View file

@ -35,7 +35,7 @@ class RobotFileParser:
"""This class provides a set of methods to read, parse and answer
questions about a single robots.txt file."""
def __init__ (self, url='', session=None, proxies=None, auth=None):
def __init__(self, url='', session=None, proxies=None, auth=None):
"""Initialize internal entry lists and store given url and
credentials."""
self.set_url(url)
@ -47,7 +47,7 @@ class RobotFileParser:
self.auth = auth
self._reset()
def _reset (self):
def _reset(self):
"""Reset internal flags and entry lists."""
self.entries = []
self.default_entry = None
@ -58,7 +58,7 @@ class RobotFileParser:
self.sitemap_urls = []
self.encoding = None
def mtime (self):
def mtime(self):
"""Returns the time the robots.txt file was last fetched.
This is useful for long-running web spiders that need to
@ -69,17 +69,17 @@ class RobotFileParser:
"""
return self.last_checked
def modified (self):
def modified(self):
"""Set the time the robots.txt file was last fetched to the
current time."""
self.last_checked = time.time()
def set_url (self, url):
def set_url(self, url):
"""Set the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urllib.parse.urlparse(url)[1:3]
def read (self):
def read(self):
"""Read the robots.txt URL and feeds it to the parser."""
self._reset()
kwargs = dict(
@ -116,7 +116,7 @@ class RobotFileParser:
self.allow_all = True
log.debug(LOG_CHECK, "%r allow all (request error)", self.url)
def _add_entry (self, entry):
def _add_entry(self, entry):
"""Add a parsed entry to entry list.
@return: None
@ -127,7 +127,7 @@ class RobotFileParser:
else:
self.entries.append(entry)
def parse (self, lines):
def parse(self, lines):
"""Parse the input lines from a robot.txt file.
We allow that a user-agent: line is not preceded by
one or more blank lines.
@ -210,7 +210,7 @@ class RobotFileParser:
self.modified()
log.debug(LOG_CHECK, "Parsed rules:\n%s", str(self))
def can_fetch (self, useragent, url):
def can_fetch(self, useragent, url):
"""Using the parsed robots.txt decide if useragent can fetch url.
@return: True if agent can fetch url, else False
@ -240,7 +240,7 @@ class RobotFileParser:
log.debug(LOG_CHECK, " ... agent not found, allow.")
return True
def get_crawldelay (self, useragent):
def get_crawldelay(self, useragent):
"""Look for a configured crawl delay.
@return: crawl delay in seconds or zero
@ -251,7 +251,7 @@ class RobotFileParser:
return entry.crawldelay
return 0
def __str__ (self):
def __str__(self):
"""Constructs string representation, usable as contents of a
robots.txt file.
@ -269,7 +269,7 @@ class RuleLine:
(allowance==0) followed by a path.
"""
def __init__ (self, path, allowance):
def __init__(self, path, allowance):
"""Initialize with given path and allowance info."""
if path == '' and not allowance:
# an empty value means allow all
@ -278,7 +278,7 @@ class RuleLine:
self.path = urllib.parse.quote(path)
self.allowance = allowance
def applies_to (self, path):
def applies_to(self, path):
"""Look if given path applies to this rule.
@return: True if pathname applies to this rule, else False
@ -286,7 +286,7 @@ class RuleLine:
"""
return self.path == "*" or path.startswith(self.path)
def __str__ (self):
def __str__(self):
"""Construct string representation in robots.txt format.
@return: robots.txt format
@ -298,13 +298,13 @@ class RuleLine:
class Entry:
"""An entry has one or more user-agents and zero or more rulelines."""
def __init__ (self):
def __init__(self):
"""Initialize user agent and rule list."""
self.useragents = []
self.rulelines = []
self.crawldelay = 0
def __str__ (self):
def __str__(self):
"""string representation in robots.txt format.
@return: robots.txt format
@ -316,7 +316,7 @@ class Entry:
lines.extend([str(line) for line in self.rulelines])
return "\n".join(lines)
def applies_to (self, useragent):
def applies_to(self, useragent):
"""Check if this entry applies to the specified agent.
@return: True if this entry applies to the agent, else False.
@ -333,7 +333,7 @@ class Entry:
return True
return False
def allowance (self, filename):
def allowance(self, filename):
"""Preconditions:
- our agent applies to this entry
- filename is URL decoded

View file

@ -34,7 +34,7 @@ if socket.has_ipv6:
raise
def create_socket (family, socktype, proto=0, timeout=60):
def create_socket(family, socktype, proto=0, timeout=60):
"""
Create a socket with given family and type. If SSL context
is given an SSL socket is created.

View file

@ -40,7 +40,7 @@ from . import i18n
from builtins import str as str_text
def unicode_safe (s, encoding=i18n.default_encoding, errors='replace'):
def unicode_safe(s, encoding=i18n.default_encoding, errors='replace'):
"""Get unicode string without raising encoding errors. Unknown
characters of the given encoding will be ignored.
@ -59,7 +59,7 @@ def unicode_safe (s, encoding=i18n.default_encoding, errors='replace'):
return str(s)
def ascii_safe (s):
def ascii_safe(s):
"""Get ASCII string without raising encoding errors. Unknown
characters of the given encoding will be ignored.
@ -73,7 +73,7 @@ def ascii_safe (s):
return s
def is_ascii (s):
def is_ascii(s):
"""Test if a string can be encoded in ASCII."""
try:
s.encode('ascii', 'strict')
@ -82,7 +82,7 @@ def is_ascii (s):
return False
def is_encoding (text):
def is_encoding(text):
"""Check if string is a valid encoding."""
try:
return codecs.lookup(text)
@ -90,12 +90,12 @@ def is_encoding (text):
return False
def url_unicode_split (url):
def url_unicode_split(url):
"""Like urllib.parse.urlsplit(), but always returning unicode parts."""
return [unicode_safe(s) for s in urllib.parse.urlsplit(url)]
def unquote (s, matching=False):
def unquote(s, matching=False):
"""Remove leading and ending single and double quotes.
The quotes need to match if matching is True. Only one quote from each
end will be stripped.
@ -124,7 +124,7 @@ _para_posix = r"(?:%(sep)s)(?:(?:%(sep)s)\s*)+" % {'sep': '\n'}
_para_win = r"(?:%(sep)s)(?:(?:%(sep)s)\s*)+" % {'sep': '\r\n'}
_para_ro = re.compile("%s|%s|%s" % (_para_mac, _para_posix, _para_win))
def get_paragraphs (text):
def get_paragraphs(text):
"""A new paragraph is considered to start at a line which follows
one or more blank lines (lines containing nothing or just spaces).
The first line of the text also starts a paragraph."""
@ -133,7 +133,7 @@ def get_paragraphs (text):
return _para_ro.split(text)
def wrap (text, width, **kwargs):
def wrap(text, width, **kwargs):
"""Adjust lines of text to be not longer than width. The text will be
returned unmodified if width <= 0.
See textwrap.wrap() for a list of supported kwargs.
@ -147,13 +147,13 @@ def wrap (text, width, **kwargs):
return os.linesep.join(ret)
def indent (text, indent_string=" "):
def indent(text, indent_string=" "):
"""Indent each line of text with the given indent string."""
return os.linesep.join("%s%s" % (indent_string, x)
for x in text.splitlines())
def get_line_number (s, index):
def get_line_number(s, index):
r"""Return the line number of s[index] or zero on errors.
Lines are assumed to be separated by the ASCII character '\n'."""
i = 0
@ -167,14 +167,14 @@ def get_line_number (s, index):
return line
def paginate (text):
def paginate(text):
"""Print text in pages of lines."""
pydoc.pager(text)
_markup_re = re.compile("<.*?>", re.DOTALL)
def remove_markup (s):
def remove_markup(s):
"""Remove all <*> html markup tags from s."""
mo = _markup_re.search(s)
while mo:
@ -183,7 +183,7 @@ def remove_markup (s):
return s
def strsize (b, grouping=True):
def strsize(b, grouping=True):
"""Return human representation of bytes b. A negative number of bytes
raises a value error."""
if b < 0:
@ -203,13 +203,13 @@ def strsize (b, grouping=True):
return "%sGB" % locale.format_string("%.1f", (float(b) / (1024*1024*1024)), grouping)
def strtime (t, func=time.localtime):
def strtime(t, func=time.localtime):
"""Return ISO 8601 formatted time."""
return time.strftime("%Y-%m-%d %H:%M:%S", func(t)) + strtimezone()
# from quodlibet
def strduration (duration):
def strduration(duration):
"""Turn a time value in seconds into hh:mm:ss or mm:ss."""
if duration < 0:
duration = abs(duration)
@ -229,7 +229,7 @@ def strduration (duration):
# from quodlibet
def strduration_long (duration, do_translate=True):
def strduration_long(duration, do_translate=True):
"""Turn a time value in seconds into x hours, x minutes, etc."""
if do_translate:
# use global translator functions
@ -275,7 +275,7 @@ def strduration_long (duration, do_translate=True):
return "%s%s" % (prefix, ", ".join(time_str))
def strtimezone ():
def strtimezone():
"""Return timezone info, %z on some platforms, but not supported on all.
"""
if time.daylight:
@ -293,7 +293,7 @@ def stripurl(s):
return s.splitlines()[0].strip()
def limit (s, length=72):
def limit(s, length=72):
"""If the length of the string exceeds the given limit, it will be cut
off and three dots will be appended.
@ -311,12 +311,12 @@ def limit (s, length=72):
return "%s..." % s[:length]
def strline (s):
def strline(s):
"""Display string representation on one line."""
return strip_control_chars("`%s'" % s.replace("\n", "\\n"))
def format_feature_warning (**kwargs):
def format_feature_warning(**kwargs):
"""Format warning that a module could not be imported and that it should
be installed for a certain URL.
"""

View file

@ -19,19 +19,19 @@ Support for managing threads.
import threading
class StoppableThread (threading.Thread):
class StoppableThread(threading.Thread):
"""Thread class with a stop() method. The thread itself has to check
regularly for the stopped() condition."""
def __init__ (self):
def __init__(self):
"""Store stop event."""
super(StoppableThread, self).__init__()
self._stopper = threading.Event()
def stop (self):
def stop(self):
"""Set stop event."""
self._stopper.set()
def stopped (self, timeout=None):
def stopped(self, timeout=None):
"""Return True if stop event is set."""
return self._stopper.wait(timeout)

View file

@ -24,7 +24,7 @@ _trace_ignore = set()
_trace_filter = set()
def trace_ignore (names):
def trace_ignore(names):
"""Add given names to trace ignore set, or clear set if names is None."""
if names is None:
_trace_ignore.clear()
@ -32,7 +32,7 @@ def trace_ignore (names):
_trace_ignore.update(names)
def trace_filter (patterns):
def trace_filter(patterns):
"""Add given patterns to trace filter set or clear set if patterns is
None."""
if patterns is None:
@ -41,7 +41,7 @@ def trace_filter (patterns):
_trace_filter.update(re.compile(pat) for pat in patterns)
def _trace (frame, event, arg):
def _trace(frame, event, arg):
"""Trace function calls."""
if event in ('call', 'c_call'):
_trace_line(frame, event, arg)
@ -53,7 +53,7 @@ def _trace (frame, event, arg):
return _trace
def _trace_full (frame, event, arg):
def _trace_full(frame, event, arg):
"""Trace every executed line."""
if event == "line":
_trace_line(frame, event, arg)
@ -62,7 +62,7 @@ def _trace_full (frame, event, arg):
return _trace_full
def _trace_line (frame, event, arg):
def _trace_line(frame, event, arg):
"""Print current executed line."""
name = frame.f_globals["__name__"]
if name in _trace_ignore:
@ -82,7 +82,7 @@ def _trace_line (frame, event, arg):
print("THREAD(%d) %r %.2f %s # %s:%d" % args)
def trace_on (full=False):
def trace_on(full=False):
"""Start tracing of the current thread (and the current thread only)."""
if full:
sys.settrace(_trace_full)
@ -90,6 +90,6 @@ def trace_on (full=False):
sys.settrace(_trace)
def trace_off ():
def trace_off():
"""Stop tracing of the current thread (and the current thread only)."""
sys.settrace(None)

View file

@ -32,7 +32,7 @@ else:
URL_TAG = 'Source-Package-URL:'
def check_update ():
def check_update():
"""Return the following values:
(False, errmsg) - online version could not be determined
(True, None) - user has newest version
@ -53,7 +53,7 @@ def check_update ():
return True, (version, None)
def get_online_version ():
def get_online_version():
"""Download update info and parse it."""
# prevent getting a cached answer
headers = {'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}
@ -70,6 +70,6 @@ def get_online_version ():
return version, url
def is_newer_version (version):
def is_newer_version(version):
"""Check if given version is newer than current version."""
return StrictVersion(version) > StrictVersion(CurrentVersion)

View file

@ -87,7 +87,7 @@ is_safe_fragment = re.compile("(?i)^%s$" % _safe_fragment_pattern).match
# snatched form urlparse.py
def splitparams (path):
def splitparams(path):
"""Split off parameter part from path.
Returns tuple (path-without-param, param)
"""
@ -100,7 +100,7 @@ def splitparams (path):
return path[:i], path[i+1:]
def is_numeric_port (portstr):
def is_numeric_port(portstr):
"""return: integer port (== True) iff portstr is a valid port number,
False otherwise
"""
@ -112,13 +112,13 @@ def is_numeric_port (portstr):
return False
def safe_host_pattern (host):
def safe_host_pattern(host):
"""Return regular expression pattern with given host for URL testing."""
return "(?i)%s://%s%s(#%s)?" % \
(_safe_scheme_pattern, host, _safe_path_pattern, _safe_fragment_pattern)
def parse_qsl (qs, encoding, keep_blank_values=0, strict_parsing=0):
def parse_qsl(qs, encoding, keep_blank_values=0, strict_parsing=0):
"""Parse a query given as a string argument.
@param qs: URL-encoded query string to be parsed
@ -168,7 +168,7 @@ def parse_qsl (qs, encoding, keep_blank_values=0, strict_parsing=0):
return r
def idna_encode (host):
def idna_encode(host):
"""Encode hostname as internationalized domain name (IDN) according
to RFC 3490.
@raise: UnicodeError if hostname is not properly IDN encoded.
@ -183,7 +183,7 @@ def idna_encode (host):
return host, False
def url_fix_host (urlparts, encoding):
def url_fix_host(urlparts, encoding):
"""Unquote and fix hostname. Returns is_idn."""
if not urlparts[1]:
urlparts[2] = urllib.parse.unquote(urlparts[2], encoding=encoding)
@ -229,7 +229,7 @@ def url_fix_host (urlparts, encoding):
return is_idn
def url_fix_common_typos (url):
def url_fix_common_typos(url):
"""Fix common typos in given URL like forgotten colon."""
if url.startswith("http//"):
url = "http://" + url[6:]
@ -238,7 +238,7 @@ def url_fix_common_typos (url):
return url
def url_fix_mailto_urlsplit (urlparts):
def url_fix_mailto_urlsplit(urlparts):
"""Split query part of mailto url if found."""
sep = b"?" if isinstance(urlparts[2], bytes) else "?"
if sep in urlparts[2]:
@ -252,7 +252,7 @@ wayback_regex = re.compile(r'(https?)(\%3A/|:/)')
def url_fix_wayback_query(path):
return wayback_regex.sub(r'\1://', path)
def url_parse_query (query, encoding):
def url_parse_query(query, encoding):
"""Parse and re-join the given CGI query."""
# if ? is in the query, split it off, seen at msdn.microsoft.com
append = ""
@ -273,7 +273,7 @@ def url_parse_query (query, encoding):
return ''.join(l) + append
def urlunsplit (urlparts):
def urlunsplit(urlparts):
"""Same as urllib.parse.urlunsplit but with extra UNC path handling
for Windows OS."""
res = urllib.parse.urlunsplit(urlparts)
@ -286,7 +286,7 @@ def urlunsplit (urlparts):
return res
def url_norm (url, encoding):
def url_norm(url, encoding):
"""Normalize the given URL which must be quoted. Supports unicode
hostnames (IDNA encoding) according to RFC 3490.
@ -335,7 +335,7 @@ _thisdir_ro = re.compile(r"^\./")
_samedir_ro = re.compile(r"/\./|/\.$")
_parentdir_ro = re.compile(r"^/(\.\./)+|/(?!\.\./)[^/]+/\.\.(/|$)")
_relparentdir_ro = re.compile(r"^(?!\.\./)[^/]+/\.\.(/|$)")
def collapse_segments (path):
def collapse_segments(path):
"""Remove all redundant segments from the given URL path.
Precondition: path is an unquoted url path"""
# replace backslashes
@ -371,7 +371,7 @@ def collapse_segments (path):
url_is_absolute = re.compile(r"^[-\.a-z]+:", re.I).match
def url_quote (url, encoding):
def url_quote(url, encoding):
"""Quote given URL."""
if not url_is_absolute(url):
return document_quote(url)
@ -393,7 +393,7 @@ def url_quote (url, encoding):
return urlunsplit(urlparts)
def document_quote (document):
def document_quote(document):
"""Quote given document."""
doc, query = urllib.parse.splitquery(document)
doc = urllib.parse.quote(doc, safe='/=,')
@ -402,7 +402,7 @@ def document_quote (document):
return doc
def match_url (url, domainlist):
def match_url(url, domainlist):
"""Return True if host part of url matches an entry in given domain list.
"""
if not url:
@ -410,7 +410,7 @@ def match_url (url, domainlist):
return match_host(url_split(url)[1], domainlist)
def match_host (host, domainlist):
def match_host(host, domainlist):
"""Return True if host matches an entry in given domain list."""
if not host:
return False
@ -428,7 +428,7 @@ if os.name == 'nt':
_nopathquote_chars += "|"
_safe_url_chars = re.escape(_nopathquote_chars + "_:.&#%?[]!")+"a-zA-Z0-9"
_safe_url_chars_ro = re.compile(r"^[%s]*$" % _safe_url_chars)
def url_needs_quoting (url):
def url_needs_quoting(url):
"""Check if url needs percent quoting. Note that the method does
only check basic character sets, and not any other syntax.
The URL might still be syntactically incorrect even when
@ -441,7 +441,7 @@ def url_needs_quoting (url):
return not _safe_url_chars_ro.match(url)
def url_split (url):
def url_split(url):
"""Split url in a tuple (scheme, hostname, port, document) where
hostname is always lowercased.
Precondition: url is syntactically correct URI (eg has no whitespace)
@ -455,14 +455,14 @@ def url_split (url):
return scheme, host, port, document
def url_unsplit (parts):
def url_unsplit(parts):
"""Rejoin URL parts to a string."""
if parts[2] == default_ports.get(parts[0]):
return "%s://%s%s" % (parts[0], parts[1], parts[3])
return "%s://%s:%d%s" % parts
def splitport (host, port=0):
def splitport(host, port=0):
"""Split optional port number from host. If host has no port number,
the given default port is returned.

View file

@ -15,7 +15,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Windows utility functions."""
def get_shell_folder (name):
def get_shell_folder(name):
"""Get Windows Shell Folder locations from the registry."""
try:
import _winreg as winreg

View file

@ -196,7 +196,7 @@ file entry:
Epilog = "\n".join((Examples, LoggerTypes, RegularExpressions, CookieFormat, ProxySupport, Notes, Retval, Warnings))
def has_encoding (encoding):
def has_encoding(encoding):
"""Detect if Python can encode in a certain encoding."""
try:
codecs.lookup(encoding)
@ -376,7 +376,7 @@ if has_argcomplete:
argcomplete.autocomplete(argparser)
def read_stdin_urls ():
def read_stdin_urls():
"""Read list of URLs, separated by white-space, from stdin."""
num = 0
while True:

View file

@ -23,7 +23,7 @@ import codecs
import html
from linkcheck import strformat
def main (filename):
def main(filename):
om = print_memorydump(filename)
dirname, basename = os.path.split(filename)
basename = os.path.splitext(basename)[0]

View file

@ -60,12 +60,12 @@ def get_long_description():
except:
return Description
def normpath (path):
def normpath(path):
"""Norm a path name to platform specific notation."""
return os.path.normpath(path)
def cnormpath (path):
def cnormpath(path):
"""Norm a path name to platform specific notation and make it absolute."""
path = normpath(path)
if os.name == 'nt':
@ -77,7 +77,7 @@ def cnormpath (path):
release_ro = re.compile(r"\(released (.+)\)")
def get_release_date ():
def get_release_date():
"""Parse and return relase date as string from doc/changelog.txt."""
fname = os.path.join("doc", "changelog.txt")
release_date = "unknown"
@ -95,10 +95,10 @@ def get_portable():
return os.environ.get('LINKCHECKER_PORTABLE', '0')
class MyInstallLib (install_lib):
class MyInstallLib(install_lib):
"""Custom library installation."""
def install (self):
def install(self):
"""Install the generated config file."""
outs = super(MyInstallLib, self).install()
infile = self.create_conf_file()
@ -107,7 +107,7 @@ class MyInstallLib (install_lib):
outs.append(outfile)
return outs
def create_conf_file (self):
def create_conf_file(self):
"""Create configuration file."""
cmd_obj = self.distribution.get_command_obj("install")
cmd_obj.ensure_finalized()
@ -141,11 +141,11 @@ class MyInstallLib (install_lib):
self.distribution.create_conf_file(data, directory=self.install_lib)
return self.get_conf_output()
def get_conf_output (self):
def get_conf_output(self):
"""Get name of configuration file."""
return self.distribution.get_conf_filename(self.install_lib)
def get_outputs (self):
def get_outputs(self):
"""Add the generated config file to the list of outputs."""
outs = super(MyInstallLib, self).get_outputs()
conf_output = self.get_conf_output()
@ -155,16 +155,16 @@ class MyInstallLib (install_lib):
return outs
class MyInstallData (install_data):
class MyInstallData(install_data):
"""Fix file permissions."""
def run (self):
def run(self):
"""Adjust permissions on POSIX systems."""
self.install_translations()
super(MyInstallData, self).run()
self.fix_permissions()
def install_translations (self):
def install_translations(self):
"""Install compiled gettext catalogs."""
# A hack to fix https://github.com/linkchecker/linkchecker/issues/102
i18n_files = []
@ -191,7 +191,7 @@ class MyInstallData (install_data):
(out, _) = self.copy_file(data, dest)
self.outfiles.append(out)
def fix_permissions (self):
def fix_permissions(self):
"""Set correct read permissions on POSIX systems. Might also
be possible by setting umask?"""
if os.name == 'posix' and not self.dry_run:
@ -205,15 +205,15 @@ class MyInstallData (install_data):
os.chmod(path, mode)
class MyDistribution (Distribution):
class MyDistribution(Distribution):
"""Custom distribution class generating config file."""
def __init__ (self, attrs):
def __init__(self, attrs):
"""Set console and windows scripts."""
super(MyDistribution, self).__init__(attrs)
self.console = ['linkchecker']
def run_commands (self):
def run_commands(self):
"""Generate config file and run commands."""
cwd = os.getcwd()
data = []
@ -223,11 +223,11 @@ class MyDistribution (Distribution):
self.create_conf_file(data)
super(MyDistribution, self).run_commands()
def get_conf_filename (self, directory):
def get_conf_filename(self, directory):
"""Get name for config file."""
return os.path.join(directory, "_%s_configdata.py" % self.get_name())
def create_conf_file (self, data, directory=None):
def create_conf_file(self, data, directory=None):
"""Create local config file from given data (list of lines) in
the directory (or current directory if not given)."""
data.insert(0, "# this file is automatically created by setup.py")
@ -253,7 +253,7 @@ class MyDistribution (Distribution):
"creating %s" % filename, self.verbose >= 1, self.dry_run)
def list_message_files (package, suffix=".mo"):
def list_message_files(package, suffix=".mo"):
"""Return list of all found message files and their installation paths."""
for fname in glob.glob("po/*" + suffix):
# basename (without extension) is a locale name
@ -263,7 +263,7 @@ def list_message_files (package, suffix=".mo"):
"share", "locale", localename, "LC_MESSAGES", domainname))
def check_manifest ():
def check_manifest():
"""Snatched from roundup.sf.net.
Check that the files listed in the MANIFEST are present when the
source is unpacked."""
@ -284,19 +284,19 @@ def check_manifest ():
print('\nMissing: '.join(err))
class MyBuild (build):
class MyBuild(build):
"""Custom build command."""
def run (self):
def run(self):
"""Check MANIFEST before building."""
check_manifest()
build.run(self)
class MyClean (clean):
class MyClean(clean):
"""Custom clean command."""
def run (self):
def run(self):
"""Remove share directory on clean."""
if self.all:
# remove share directory
@ -308,10 +308,10 @@ class MyClean (clean):
clean.run(self)
class MySdist (sdist):
class MySdist(sdist):
"""Custom sdist command."""
def get_file_list (self):
def get_file_list(self):
"""Add MANIFEST to the file list."""
super(MySdist, self).get_file_list()
self.filelist.append("MANIFEST")

View file

@ -29,7 +29,7 @@ basedir = os.path.dirname(__file__)
linkchecker_cmd = os.path.join(os.path.dirname(basedir), "linkchecker")
def run (cmd, verbosity=0, **kwargs):
def run(cmd, verbosity=0, **kwargs):
"""Run command without error checking.
@return: command return code"""
if kwargs.get("shell"):
@ -38,7 +38,7 @@ def run (cmd, verbosity=0, **kwargs):
return subprocess.call(cmd, **kwargs)
def run_checked (cmd, ret_ok=(0,), **kwargs):
def run_checked(cmd, ret_ok=(0,), **kwargs):
"""Run command and raise OSError on error."""
retcode = run(cmd, **kwargs)
if retcode not in ret_ok:
@ -48,7 +48,7 @@ def run_checked (cmd, ret_ok=(0,), **kwargs):
def run_silent (cmd):
def run_silent(cmd):
"""Run given command without output."""
null = open(os.name == 'nt' and ':NUL' or "/dev/null", 'w')
try:
@ -57,11 +57,11 @@ def run_silent (cmd):
null.close()
def _need_func (testfunc, name):
def _need_func(testfunc, name):
"""Decorator skipping test if given testfunc fails."""
def check_func (func):
def check_func(func):
@wraps(func)
def newfunc (*args, **kwargs):
def newfunc(*args, **kwargs):
if not testfunc():
pytest.skip("%s is not available" % name)
return func(*args, **kwargs)
@ -70,7 +70,7 @@ def _need_func (testfunc, name):
@lru_cache(1)
def has_network ():
def has_network():
"""Test if network is up."""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@ -85,7 +85,7 @@ need_network = _need_func(has_network, "network")
@lru_cache(1)
def has_msgfmt ():
def has_msgfmt():
"""Test if msgfmt is available."""
return run_silent(["msgfmt", "-V"]) == 0
@ -93,7 +93,7 @@ need_msgfmt = _need_func(has_msgfmt, "msgfmt")
@lru_cache(1)
def has_posix ():
def has_posix():
"""Test if this is a POSIX system."""
return os.name == "posix"
@ -101,7 +101,7 @@ need_posix = _need_func(has_posix, "POSIX system")
@lru_cache(1)
def has_windows ():
def has_windows():
"""Test if this is a Windows system."""
return os.name == "nt"
@ -109,7 +109,7 @@ need_windows = _need_func(has_windows, "Windows system")
@lru_cache(1)
def has_linux ():
def has_linux():
"""Test if this is a Linux system."""
return sys.platform.startswith("linux")
@ -117,7 +117,7 @@ need_linux = _need_func(has_linux, "Linux system")
@lru_cache(1)
def has_clamav ():
def has_clamav():
"""Test if ClamAV daemon is installed and running."""
try:
cmd = ["grep", "LocalSocket", "/etc/clamav/clamd.conf"]
@ -135,7 +135,7 @@ need_clamav = _need_func(has_clamav, "ClamAV")
@lru_cache(1)
def has_proxy ():
def has_proxy():
"""Test if proxy is running on port 8081."""
try:
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
@ -149,7 +149,7 @@ need_proxy = _need_func(has_proxy, "proxy")
@lru_cache(1)
def has_pyftpdlib ():
def has_pyftpdlib():
"""Test if pyftpdlib is available."""
try:
import pyftpdlib
@ -161,7 +161,7 @@ need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
@lru_cache(1)
def has_biplist ():
def has_biplist():
"""Test if biplist is available."""
try:
import biplist
@ -173,7 +173,7 @@ need_biplist = _need_func(has_biplist, "biplist")
@lru_cache(1)
def has_newsserver (server):
def has_newsserver(server):
import nntplib
try:
nntp = nntplib.NNTP(server, usenetrc=False)
@ -183,10 +183,10 @@ def has_newsserver (server):
return False
def need_newsserver (server):
def need_newsserver(server):
"""Decorator skipping test if newsserver is not available."""
def check_func (func):
def newfunc (*args, **kwargs):
def check_func(func):
def newfunc(*args, **kwargs):
if not has_newsserver(server):
pytest.skip("Newsserver `%s' is not available" % server)
return func(*args, **kwargs)
@ -197,7 +197,7 @@ def need_newsserver (server):
@lru_cache(1)
def has_x11 ():
def has_x11():
"""Test if DISPLAY variable is set."""
return os.getenv('DISPLAY') is not None
@ -222,7 +222,7 @@ need_pdflib = _need_func(has_pdflib, 'pdflib')
@contextmanager
def _limit_time (seconds):
def _limit_time(seconds):
"""Raises LinkCheckerInterrupt if given number of seconds have passed."""
if os.name == 'posix':
def signal_handler(signum, frame):
@ -237,10 +237,10 @@ def _limit_time (seconds):
signal.signal(signal.SIGALRM, old_handler)
def limit_time (seconds, skip=False):
def limit_time(seconds, skip=False):
"""Limit test time to the given number of seconds, else fail or skip."""
def run_limited (func):
def new_func (*args, **kwargs):
def run_limited(func):
def new_func(*args, **kwargs):
try:
with _limit_time(seconds):
return func(*args, **kwargs)
@ -253,7 +253,7 @@ def limit_time (seconds, skip=False):
return run_limited
def get_file (filename=None):
def get_file(filename=None):
"""
Get file name located within 'data' directory.
"""

View file

@ -32,7 +32,7 @@ from builtins import str as str_text
get_url_from = linkcheck.checker.get_url_from
class TestLogger (linkcheck.logger._Logger):
class TestLogger(linkcheck.logger._Logger):
"""
Output logger for automatic regression tests.
"""
@ -53,7 +53,7 @@ class TestLogger (linkcheck.logger._Logger):
'url',
]
def __init__ (self, **kwargs):
def __init__(self, **kwargs):
"""
The kwargs must have "expected" keyword with the expected logger
output lines.
@ -81,13 +81,13 @@ class TestLogger (linkcheck.logger._Logger):
flags=re.DOTALL | re.MULTILINE)
if x])).splitlines()
def start_output (self):
def start_output(self):
"""
Nothing to do here.
"""
pass
def log_url (self, url_data):
def log_url(self, url_data):
"""
Append logger output to self.result.
"""
@ -131,7 +131,7 @@ class TestLogger (linkcheck.logger._Logger):
# note: do not append url_data.result since this is
# platform dependent
def end_output (self, linknumber=-1, **kwargs):
def end_output(self, linknumber=-1, **kwargs):
"""
Stores differences between expected and result in self.diff.
"""
@ -147,11 +147,11 @@ class TestLogger (linkcheck.logger._Logger):
self.diff.append(line)
def get_file_url (filename):
def get_file_url(filename):
return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/"))
def add_fileoutput_config (config):
def add_fileoutput_config(config):
if os.name == 'posix':
devnull = '/dev/null'
elif os.name == 'nt':
@ -165,7 +165,7 @@ def add_fileoutput_config (config):
config['fileoutput'].append(logger)
def get_test_aggregate (confargs, logargs, logger=TestLogger):
def get_test_aggregate(confargs, logargs, logger=TestLogger):
"""Initialize a test configuration object."""
config = linkcheck.configuration.Configuration()
config.logger_add(logger)
@ -183,23 +183,23 @@ def get_test_aggregate (confargs, logargs, logger=TestLogger):
return linkcheck.director.get_aggregate(config)
class LinkCheckTest (unittest.TestCase):
class LinkCheckTest(unittest.TestCase):
"""
Functional test class with ability to test local files.
"""
logger = TestLogger
def setUp (self):
def setUp(self):
"""Ensure the current locale setting is the default.
Otherwise, warnings will get translated and will break tests."""
super(LinkCheckTest, self).setUp()
linkcheck.init_i18n(loc='C')
def norm (self, url, encoding="utf-8"):
def norm(self, url, encoding="utf-8"):
"""Helper function to norm a url."""
return linkcheck.url.url_norm(url, encoding=encoding)[0]
def get_attrs (self, **kwargs):
def get_attrs(self, **kwargs):
"""Return current and data directory as dictionary.
You can augment the dict with keyword attributes."""
d = {
@ -209,7 +209,7 @@ class LinkCheckTest (unittest.TestCase):
d.update(kwargs)
return d
def get_resultlines (self, filename):
def get_resultlines(self, filename):
"""
Return contents of file, as list of lines without line endings,
ignoring empty lines and lines starting with a hash sign (#).
@ -230,7 +230,7 @@ class LinkCheckTest (unittest.TestCase):
"""Get URL for given filename."""
return get_file(filename)
def file_test (self, filename, confargs=None):
def file_test(self, filename, confargs=None):
"""Check <filename> with expected result in <filename>.result."""
url = self.get_url(filename)
if confargs is None:
@ -249,7 +249,7 @@ class LinkCheckTest (unittest.TestCase):
self.fail("%d internal errors occurred!"
% logger.stats.internal_errors)
def direct (self, url, resultlines, parts=None, recursionlevel=0,
def direct(self, url, resultlines, parts=None, recursionlevel=0,
confargs=None, url_encoding=None):
"""Check url with expected result."""
assert isinstance(url, str_text), repr(url)
@ -273,18 +273,18 @@ class LinkCheckTest (unittest.TestCase):
self.fail(os.linesep.join(l))
class MailTest (LinkCheckTest):
class MailTest(LinkCheckTest):
"""Test mailto: link checking."""
def mail_valid (self, addr, **kwargs):
def mail_valid(self, addr, **kwargs):
"""Test valid mail address."""
return self.mail_test(addr, "valid", **kwargs)
def mail_error (self, addr, **kwargs):
def mail_error(self, addr, **kwargs):
"""Test error mail address."""
return self.mail_test(addr, "error", **kwargs)
def mail_test (self, addr, result, encoding="utf-8", cache_key=None, warning=None):
def mail_test(self, addr, result, encoding="utf-8", cache_key=None, warning=None):
"""Test mail address."""
url = self.norm(addr, encoding=encoding)
if cache_key is None:

View file

@ -26,21 +26,21 @@ from . import LinkCheckTest
TIMEOUT = 5
class FtpServerTest (LinkCheckTest):
class FtpServerTest(LinkCheckTest):
"""Start/stop an FTP server that can be used for testing."""
def __init__ (self, methodName='runTest'):
def __init__(self, methodName='runTest'):
"""Init test class and store default ftp server port."""
super(FtpServerTest, self).__init__(methodName=methodName)
self.host = 'localhost'
self.port = None
def setUp (self):
def setUp(self):
"""Start a new FTP server in a new thread."""
self.port = start_server(self.host, 0)
self.assertFalse(self.port is None)
def tearDown (self):
def tearDown(self):
"""Send stop request to server."""
try:
stop_server(self.host, self.port)
@ -48,7 +48,7 @@ class FtpServerTest (LinkCheckTest):
pass
def start_server (host, port):
def start_server(host, port):
def line_logger(self, msg):
if "kill" in msg:
raise KeyboardInterrupt()
@ -95,7 +95,7 @@ def start_server (host, port):
return port
def stop_server (host, port):
def stop_server(host, port):
"""Stop a running FTP server."""
ftp = FTP()
ftp.connect(host, port, TIMEOUT)

View file

@ -30,12 +30,12 @@ from . import LinkCheckTest
from .. import get_file
class StoppableHttpRequestHandler (SimpleHTTPRequestHandler):
class StoppableHttpRequestHandler(SimpleHTTPRequestHandler):
"""
HTTP request handler with QUIT stopping the server.
"""
def do_QUIT (self):
def do_QUIT(self):
"""
Send 200 OK response, and set server.stop to True.
"""
@ -43,7 +43,7 @@ class StoppableHttpRequestHandler (SimpleHTTPRequestHandler):
self.end_headers()
self.server.stop = True
def log_message (self, format, *args):
def log_message(self, format, *args):
"""
Logging is disabled.
"""
@ -55,12 +55,12 @@ StoppableHttpRequestHandler.extensions_map.update({
})
class StoppableHttpServer (HTTPServer):
class StoppableHttpServer(HTTPServer):
"""
HTTP server that reacts to self.stop flag.
"""
def serve_forever (self):
def serve_forever(self):
"""
Handle one request at a time until stopped.
"""
@ -69,13 +69,13 @@ class StoppableHttpServer (HTTPServer):
self.handle_request()
class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
"""
Handler ignoring the query part of requests and sending dummy directory
listings.
"""
def remove_path_query (self):
def remove_path_query(self):
"""
Remove everything after a question mark.
"""
@ -90,7 +90,7 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
return status
return 500
def do_GET (self):
def do_GET(self):
"""
Removes query part of GET request.
"""
@ -104,7 +104,7 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
else:
super(NoQueryHttpRequestHandler, self).do_GET()
def do_HEAD (self):
def do_HEAD(self):
"""
Removes query part of HEAD request.
"""
@ -147,12 +147,12 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
return f
class HttpServerTest (LinkCheckTest):
class HttpServerTest(LinkCheckTest):
"""
Start/stop an HTTP server that can be used for testing.
"""
def __init__ (self, methodName='runTest'):
def __init__(self, methodName='runTest'):
"""
Init test class and store default http server port.
"""
@ -193,7 +193,7 @@ class HttpsServerTest(HttpServerTest):
return "https://localhost:%d/tests/checker/data/%s" % (self.port, filename)
def start_server (handler, https=False):
def start_server(handler, https=False):
"""Start an HTTP server thread and return its port number."""
server_address = ('localhost', 0)
handler.protocol_version = "HTTP/1.0"
@ -221,7 +221,7 @@ def start_server (handler, https=False):
return port
def stop_server (port, https=False):
def stop_server(port, https=False):
"""Stop an HTTP server thread."""
if https:
conn = HTTPSConnection("localhost:%d" % port,
@ -232,7 +232,7 @@ def stop_server (port, https=False):
conn.getresponse()
def get_cookie (maxage=2000):
def get_cookie(maxage=2000):
data = (
("Comment", "justatest"),
("Max-Age", "%d" % maxage),
@ -243,30 +243,30 @@ def get_cookie (maxage=2000):
return "; ".join('%s="%s"' % (key, value) for key, value in data)
class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
"""Handler redirecting certain requests, and setting cookies."""
def end_headers (self):
def end_headers(self):
"""Send cookie before ending headers."""
self.send_header("Set-Cookie", get_cookie())
self.send_header("Set-Cookie", get_cookie(maxage=0))
super(CookieRedirectHttpRequestHandler, self).end_headers()
def redirect (self):
def redirect(self):
"""Redirect request."""
path = self.path.replace("redirect", "newurl")
self.send_response(302)
self.send_header("Location", path)
self.end_headers()
def redirect_newhost (self):
def redirect_newhost(self):
"""Redirect request to a new host."""
path = "http://www.example.com/"
self.send_response(302)
self.send_header("Location", path)
self.end_headers()
def redirect_newscheme (self):
def redirect_newscheme(self):
"""Redirect request to a new scheme."""
if "file" in self.path:
path = "file:README.md"
@ -276,7 +276,7 @@ class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
self.send_header("Location", path)
self.end_headers()
def do_GET (self):
def do_GET(self):
"""Handle redirections for GET."""
if "redirect_newscheme" in self.path:
self.redirect_newscheme()
@ -287,7 +287,7 @@ class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
else:
super(CookieRedirectHttpRequestHandler, self).do_GET()
def do_HEAD (self):
def do_HEAD(self):
"""Handle redirections for HEAD."""
if "redirect_newscheme" in self.path:
self.redirect_newscheme()

View file

@ -25,10 +25,10 @@ from . import LinkCheckTest
TIMEOUT = 5
class TelnetServerTest (LinkCheckTest):
class TelnetServerTest(LinkCheckTest):
"""Start/stop a Telnet server that can be used for testing."""
def __init__ (self, methodName='runTest'):
def __init__(self, methodName='runTest'):
"""Init test class and store default ftp server port."""
super(TelnetServerTest, self).__init__(methodName=methodName)
self.host = 'localhost'
@ -46,7 +46,7 @@ class TelnetServerTest (LinkCheckTest):
netloc = self.host
return "telnet://%s:%d" % (netloc, self.port)
def setUp (self):
def setUp(self):
"""Start a new Telnet server in a new thread."""
self.port, self.server_thread = start_server(self.host, 0, self.stop_event)
self.assertFalse(self.port is None)
@ -59,7 +59,7 @@ class TelnetServerTest (LinkCheckTest):
assert not self.server_thread.is_alive()
def start_server (host, port, stop_event):
def start_server(host, port, stop_event):
# Instantiate Telnet server class and listen to host:port
clients = []
def on_connect(client):

View file

@ -19,12 +19,12 @@ Test html anchor parsing and checking.
from . import LinkCheckTest
class TestAnchor (LinkCheckTest):
class TestAnchor(LinkCheckTest):
"""
Test anchor checking of HTML pages.
"""
def test_anchor (self):
def test_anchor(self):
confargs = {"enabledplugins": ["AnchorCheck"]}
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
nurl = self.norm(url)

View file

@ -19,19 +19,19 @@ Test html <base> tag parsing.
from . import LinkCheckTest
class TestBase (LinkCheckTest):
class TestBase(LinkCheckTest):
"""
Test links of base*.html files.
"""
def test_base1 (self):
def test_base1(self):
self.file_test("base1.html")
def test_base2 (self):
def test_base2(self):
self.file_test("base2.html")
def test_base3 (self):
def test_base3(self):
self.file_test("base3.html")
def test_base4 (self):
def test_base4(self):
self.file_test("base4.html")

View file

@ -21,33 +21,33 @@ from .. import need_network, need_biplist
import os
class TestBookmarks (LinkCheckTest):
class TestBookmarks(LinkCheckTest):
"""
Test bookmark link checking and content parsing.
"""
@need_network
def test_firefox_bookmarks (self):
def test_firefox_bookmarks(self):
# firefox 3 bookmark file parsing
self.file_test("places.sqlite")
@need_network
def test_opera_bookmarks (self):
def test_opera_bookmarks(self):
# Opera bookmark file parsing
self.file_test("opera6.adr")
@need_network
def test_chromium_bookmarks (self):
def test_chromium_bookmarks(self):
# Chromium and Google Chrome bookmark file parsing
self.file_test("Bookmarks")
@need_network
def test_safari_bookmarks_xml (self):
def test_safari_bookmarks_xml(self):
# Safari bookmark file parsing (for plaintext plist files)
self.file_test(os.path.join("plist_xml", "Bookmarks.plist"))
@need_network
@need_biplist
def test_safari_bookmarks_binary (self):
def test_safari_bookmarks_binary(self):
# Safari bookmark file parsing (for binary plist files)
self.file_test(os.path.join("plist_binary", "Bookmarks.plist"))

View file

@ -19,7 +19,7 @@ Test html <base> tag parsing.
from . import LinkCheckTest
class TestBase (LinkCheckTest):
class TestBase(LinkCheckTest):
"""
Test, if charset encoding is done right.
The linkchecker should translate the encoding

View file

@ -20,12 +20,12 @@ Test error checking.
from . import LinkCheckTest
class TestError (LinkCheckTest):
class TestError(LinkCheckTest):
"""
Test unrecognized or syntactically wrong links.
"""
def test_unrecognized (self):
def test_unrecognized(self):
# Unrecognized scheme
url = "hutzli:"
attrs = self.get_attrs(url=url)
@ -38,7 +38,7 @@ class TestError (LinkCheckTest):
]
self.direct(url, resultlines)
def test_invalid1 (self):
def test_invalid1(self):
# invalid scheme chars
url = "äöü:"
attrs = self.get_attrs(url=url)
@ -52,7 +52,7 @@ class TestError (LinkCheckTest):
]
self.direct(url, resultlines)
def test_invalid2 (self):
def test_invalid2(self):
# missing scheme alltogether
url = "äöü"
attrs = self.get_attrs(url=url)
@ -66,7 +66,7 @@ class TestError (LinkCheckTest):
]
self.direct(url, resultlines)
def test_invalid3 (self):
def test_invalid3(self):
# really fucked up
url = "@³²¼][½ ³@] ¬½"
attrs = self.get_attrs(url=url)

View file

@ -26,7 +26,7 @@ from tests import need_word, need_pdflib
from . import LinkCheckTest, get_file
def unzip (filename, targetdir):
def unzip(filename, targetdir):
"""Unzip given zipfile into targetdir."""
if isinstance(targetdir, unicode):
targetdir = str(targetdir)
@ -42,34 +42,34 @@ def unzip (filename, targetdir):
outfile.close()
class TestFile (LinkCheckTest):
class TestFile(LinkCheckTest):
"""
Test file:// link checking (and file content parsing).
"""
def test_html (self):
def test_html(self):
self.file_test("file.html")
def test_html_url_quote (self):
def test_html_url_quote(self):
self.file_test("file_url_quote.html")
def test_wml (self):
def test_wml(self):
self.file_test("file.wml")
def test_text (self):
def test_text(self):
self.file_test("file.txt")
def test_asc (self):
def test_asc(self):
self.file_test("file.asc")
def test_css (self):
def test_css(self):
self.file_test("file.css")
def test_php (self):
def test_php(self):
self.file_test("file.php")
@need_word
def test_word (self):
def test_word(self):
confargs = dict(enabledplugins=["WordParser"])
self.file_test("file.doc", confargs=confargs)
@ -82,11 +82,11 @@ class TestFile (LinkCheckTest):
confargs = dict(enabledplugins=["MarkdownCheck"])
self.file_test("file.markdown", confargs=confargs)
def test_urllist (self):
def test_urllist(self):
self.file_test("urllist.txt")
@pytest.mark.xfail
def test_directory_listing (self):
def test_directory_listing(self):
# unpack non-unicode filename which cannot be stored
# in the SF subversion repository
if os.name != 'posix' or sys.platform != 'linux2':
@ -96,11 +96,11 @@ class TestFile (LinkCheckTest):
unzip(dirname + ".zip", os.path.dirname(dirname))
self.file_test("dir")
def test_unicode_filename (self):
def test_unicode_filename(self):
# a unicode filename
self.file_test("Мошкова.bin")
def test_good_file (self):
def test_good_file(self):
url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
nurl = self.norm(url)
resultlines = [
@ -111,7 +111,7 @@ class TestFile (LinkCheckTest):
]
self.direct(url, resultlines)
def test_bad_file (self):
def test_bad_file(self):
if os.name == 'nt':
# Fails on NT platforms and I am too lazy to fix
# Cause: url get quoted %7C which gets lowercased to
@ -127,7 +127,7 @@ class TestFile (LinkCheckTest):
]
self.direct(url, resultlines)
def test_good_file_missing_dslash (self):
def test_good_file_missing_dslash(self):
# good file (missing double slash)
attrs = self.get_attrs()
url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs
@ -139,7 +139,7 @@ class TestFile (LinkCheckTest):
]
self.direct(url, resultlines)
def test_good_dir (self):
def test_good_dir(self):
url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs()
resultlines = [
"url %s" % url,
@ -149,7 +149,7 @@ class TestFile (LinkCheckTest):
]
self.direct(url, resultlines)
def test_good_dir_space (self):
def test_good_dir_space(self):
url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
nurl = self.norm(url)
url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()

View file

@ -19,10 +19,10 @@ Test html <frame> tag parsing.
from . import LinkCheckTest
class TestFrames (LinkCheckTest):
class TestFrames(LinkCheckTest):
"""
Test link checking of HTML framesets.
"""
def test_frames (self):
def test_frames(self):
self.file_test("frames.html")

View file

@ -20,11 +20,11 @@ from .. import need_pyftpdlib
from .ftpserver import FtpServerTest
class TestFtp (FtpServerTest):
class TestFtp(FtpServerTest):
"""Test ftp: link checking."""
@need_pyftpdlib
def test_ftp (self):
def test_ftp(self):
# ftp two slashes
url = "ftp://%s:%d/" % (self.host, self.port)
resultlines = [

View file

@ -22,7 +22,7 @@ import pytest
from tests import need_network
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
class TestHttp (HttpServerTest):
class TestHttp(HttpServerTest):
"""Test http:// link checking."""
def __init__(self, methodName='runTest'):
@ -30,7 +30,7 @@ class TestHttp (HttpServerTest):
self.handler = CookieRedirectHttpRequestHandler
@need_network
def test_html (self):
def test_html(self):
confargs = dict(recursionlevel=1)
self.file_test("http.html", confargs=confargs)
self.file_test("http_lowercase.html", confargs=confargs)

View file

@ -19,15 +19,15 @@ Test http checking.
from .httpserver import HttpServerTest
from tests import need_network
class TestHttpMisc (HttpServerTest):
class TestHttpMisc(HttpServerTest):
"""Test http:// misc link checking."""
@need_network
def test_html (self):
def test_html(self):
self.swf_test()
self.file_test("sitemap.xml")
def swf_test (self):
def swf_test(self):
url = self.get_url("test.swf")
resultlines = [
"url %s" % url,

View file

@ -19,7 +19,7 @@ Test http checking.
from tests import need_network
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
class TestHttpRedirect (HttpServerTest):
class TestHttpRedirect(HttpServerTest):
"""Test http:// link redirection checking."""
def __init__(self, methodName='runTest'):
@ -27,14 +27,14 @@ class TestHttpRedirect (HttpServerTest):
self.handler = CookieRedirectHttpRequestHandler
@need_network
def test_redirect (self):
def test_redirect(self):
self.redirect1()
self.redirect2()
self.redirect3()
self.redirect4()
self.redirect5()
def redirect1 (self):
def redirect1(self):
url = "http://localhost:%d/redirect1" % self.port
nurl = url
rurl = url.replace("redirect", "newurl")
@ -47,7 +47,7 @@ class TestHttpRedirect (HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=0)
def redirect2 (self):
def redirect2(self):
url = "http://localhost:%d/tests/checker/data/redirect.html" % \
self.port
nurl = url
@ -61,12 +61,12 @@ class TestHttpRedirect (HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=99)
def redirect3 (self):
def redirect3(self):
url = "http://localhost:%d/tests/checker/data/redir.html" % self.port
resultlines = self.get_resultlines("redir.html")
self.direct(url, resultlines, recursionlevel=1)
def redirect4 (self):
def redirect4(self):
url = "http://localhost:%d/redirect_newscheme_ftp" % self.port
nurl = url
#rurl = "ftp://example.com/"
@ -84,7 +84,7 @@ class TestHttpRedirect (HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=99)
def redirect5 (self):
def redirect5(self):
url = "http://localhost:%d/redirect_newscheme_file" % self.port
nurl = url
#rurl = "file:README"

View file

@ -18,14 +18,14 @@ Test http checking.
"""
from .httpserver import HttpServerTest
class TestHttpRobots (HttpServerTest):
class TestHttpRobots(HttpServerTest):
"""Test robots.txt link checking behaviour."""
def test_html (self):
def test_html(self):
self.robots_txt_test()
self.robots_txt2_test()
def robots_txt_test (self):
def robots_txt_test(self):
url = "http://localhost:%d/robots.txt" % self.port
resultlines = [
"url %s" % url,
@ -35,7 +35,7 @@ class TestHttpRobots (HttpServerTest):
]
self.direct(url, resultlines, recursionlevel=5)
def robots_txt2_test (self):
def robots_txt2_test(self):
url = "http://localhost:%d/secret" % self.port
resultlines = [
"url %s" % url,

View file

@ -52,7 +52,7 @@ class TestHttps(HttpsServerTest):
f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert))
@need_network
def test_https (self):
def test_https(self):
url = self.get_url("")
resultlines = [
"url %s" % url,

Some files were not shown because too many files have changed in this diff Show more