mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-30 02:54:42 +00:00
commit
1e35530f9d
135 changed files with 1017 additions and 1016 deletions
|
|
@ -23,7 +23,7 @@ import sys
|
|||
from xml.etree.ElementTree import parse
|
||||
|
||||
|
||||
def main (args):
|
||||
def main(args):
|
||||
filename = args[0]
|
||||
with open(filename) as fd:
|
||||
tree = parse(fd)
|
||||
|
|
|
|||
|
|
@ -48,12 +48,12 @@ from .logconf import (
|
|||
import _LinkChecker_configdata as configdata
|
||||
|
||||
|
||||
def module_path ():
|
||||
def module_path():
|
||||
"""Return absolute directory of system executable."""
|
||||
return os.path.dirname(os.path.abspath(sys.executable))
|
||||
|
||||
|
||||
def get_install_data ():
|
||||
def get_install_data():
|
||||
"""Return absolute path of LinkChecker data installation directory."""
|
||||
from .loader import is_frozen
|
||||
if is_frozen():
|
||||
|
|
@ -70,7 +70,7 @@ class LinkCheckerInterrupt(Exception):
|
|||
pass
|
||||
|
||||
|
||||
def get_link_pat (arg, strict=False):
|
||||
def get_link_pat(arg, strict=False):
|
||||
"""Get a link pattern matcher for intern/extern links.
|
||||
Returns a compiled pattern and a negate and strict option.
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ def get_link_pat (arg, strict=False):
|
|||
}
|
||||
|
||||
|
||||
def init_i18n (loc=None):
|
||||
def init_i18n(loc=None):
|
||||
"""Initialize i18n with the configured locale dir. The environment
|
||||
variable LOCPATH can also specify a locale dir.
|
||||
|
||||
|
|
@ -127,7 +127,7 @@ def init_i18n (loc=None):
|
|||
init_i18n()
|
||||
|
||||
|
||||
def drop_privileges ():
|
||||
def drop_privileges():
|
||||
"""Make sure to drop root privileges on POSIX systems."""
|
||||
if os.name != 'posix':
|
||||
return
|
||||
|
|
|
|||
|
|
@ -163,7 +163,7 @@ if os.name == 'nt':
|
|||
Beep = "\007"
|
||||
|
||||
|
||||
def esc_ansicolor (color):
|
||||
def esc_ansicolor(color):
|
||||
"""convert a named color definition to an escaped ANSI color"""
|
||||
control = ''
|
||||
if ";" in color:
|
||||
|
|
@ -191,7 +191,7 @@ def get_win_color(color):
|
|||
return foreground, background, style
|
||||
|
||||
|
||||
def has_colors (fp):
|
||||
def has_colors(fp):
|
||||
"""Test if given file is an ANSI color enabled tty."""
|
||||
# The is_tty() function ensures that we do not colorize
|
||||
# redirected streams, as this is almost never what we want
|
||||
|
|
@ -210,7 +210,7 @@ def has_colors (fp):
|
|||
return False
|
||||
|
||||
|
||||
def get_columns (fp):
|
||||
def get_columns(fp):
|
||||
"""Return number of columns for given file."""
|
||||
if not is_tty(fp):
|
||||
return 80
|
||||
|
|
@ -226,7 +226,7 @@ def get_columns (fp):
|
|||
return 80
|
||||
|
||||
|
||||
def _write_color_colorama (fp, text, color):
|
||||
def _write_color_colorama(fp, text, color):
|
||||
"""Colorize text with given color."""
|
||||
foreground, background, style = get_win_color(color)
|
||||
colorama.set_console(foreground=foreground, background=background,
|
||||
|
|
@ -235,7 +235,7 @@ def _write_color_colorama (fp, text, color):
|
|||
colorama.reset_console()
|
||||
|
||||
|
||||
def _write_color_ansi (fp, text, color):
|
||||
def _write_color_ansi(fp, text, color):
|
||||
"""Colorize text with given color."""
|
||||
fp.write(esc_ansicolor(color))
|
||||
fp.write(text)
|
||||
|
|
@ -252,7 +252,7 @@ else:
|
|||
class Colorizer:
|
||||
"""Prints colored messages to streams."""
|
||||
|
||||
def __init__ (self, fp):
|
||||
def __init__(self, fp):
|
||||
"""Initialize with given stream (file-like object)."""
|
||||
self.fp = fp
|
||||
if has_colors(fp):
|
||||
|
|
@ -260,26 +260,26 @@ class Colorizer:
|
|||
else:
|
||||
self.write = self._write
|
||||
|
||||
def _write (self, text, color=None):
|
||||
def _write(self, text, color=None):
|
||||
"""Print text as-is."""
|
||||
self.fp.write(text)
|
||||
|
||||
def _write_color (self, text, color=None):
|
||||
def _write_color(self, text, color=None):
|
||||
"""Print text with given color. If color is None, print text as-is."""
|
||||
if color is None:
|
||||
self.fp.write(text)
|
||||
else:
|
||||
write_color(self.fp, text, color)
|
||||
|
||||
def __getattr__ (self, name):
|
||||
def __getattr__(self, name):
|
||||
"""Delegate attribute access to the stored stream object."""
|
||||
return getattr(self.fp, name)
|
||||
|
||||
|
||||
class ColoredStreamHandler (logging.StreamHandler):
|
||||
class ColoredStreamHandler(logging.StreamHandler):
|
||||
"""Send colored log messages to streams (file-like objects)."""
|
||||
|
||||
def __init__ (self, strm=None):
|
||||
def __init__(self, strm=None):
|
||||
"""Log to given stream (a file-like object) or to stderr if
|
||||
strm is None.
|
||||
"""
|
||||
|
|
@ -293,12 +293,12 @@ class ColoredStreamHandler (logging.StreamHandler):
|
|||
logging.DEBUG: 'white',
|
||||
}
|
||||
|
||||
def get_color (self, record):
|
||||
def get_color(self, record):
|
||||
"""Get appropriate color according to log level.
|
||||
"""
|
||||
return self.colors.get(record.levelno, 'default')
|
||||
|
||||
def emit (self, record):
|
||||
def emit(self, record):
|
||||
"""Emit a record.
|
||||
|
||||
If a formatter is specified, it is used to format the record.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from xdg import xdg_config_home
|
|||
nt_filename_encoding="mbcs"
|
||||
|
||||
|
||||
def get_profile_dir ():
|
||||
def get_profile_dir():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
if os.name == 'nt':
|
||||
if "LOCALAPPDATA" in os.environ:
|
||||
|
|
@ -46,7 +46,7 @@ def get_profile_dir ():
|
|||
return dirpath
|
||||
|
||||
|
||||
def find_bookmark_file (profile="Default"):
|
||||
def find_bookmark_file(profile="Default"):
|
||||
"""Return the bookmark file of the Default profile.
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
could be found.
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from xdg.BaseDirectory import xdg_config_home
|
|||
nt_filename_encoding="mbcs"
|
||||
|
||||
|
||||
def get_profile_dir ():
|
||||
def get_profile_dir():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
if os.name == 'nt':
|
||||
if "LOCALAPPDATA" in os.environ:
|
||||
|
|
@ -48,7 +48,7 @@ def get_profile_dir ():
|
|||
return dirpath
|
||||
|
||||
|
||||
def find_bookmark_file (profile="Default"):
|
||||
def find_bookmark_file(profile="Default"):
|
||||
"""Return the bookmark file of the Default profile.
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
could be found.
|
||||
|
|
@ -64,7 +64,7 @@ def find_bookmark_file (profile="Default"):
|
|||
return ""
|
||||
|
||||
|
||||
def parse_bookmark_data (data):
|
||||
def parse_bookmark_data(data):
|
||||
"""Parse data string.
|
||||
Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
|
|
@ -73,7 +73,7 @@ def parse_bookmark_data (data):
|
|||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmark_file (file):
|
||||
def parse_bookmark_file(file):
|
||||
"""Parse file object.
|
||||
Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
|
|
@ -82,14 +82,14 @@ def parse_bookmark_file (file):
|
|||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmark_json (data):
|
||||
def parse_bookmark_json(data):
|
||||
"""Parse complete JSON data for Chromium Bookmarks."""
|
||||
for entry in data["roots"].values():
|
||||
for url, name in parse_bookmark_node(entry):
|
||||
yield url, name
|
||||
|
||||
|
||||
def parse_bookmark_node (node):
|
||||
def parse_bookmark_node(node):
|
||||
"""Parse one JSON node of Chromium Bookmarks."""
|
||||
if node["type"] == "url":
|
||||
yield node["url"], node["name"]
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ extension = re.compile(r'/places.sqlite$', re.IGNORECASE)
|
|||
# Windows filename encoding
|
||||
nt_filename_encoding="mbcs"
|
||||
|
||||
def get_profile_dir ():
|
||||
def get_profile_dir():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
if os.name == 'nt':
|
||||
basedir = unicode(os.environ["APPDATA"], nt_filename_encoding)
|
||||
|
|
@ -40,7 +40,7 @@ def get_profile_dir ():
|
|||
return dirpath
|
||||
|
||||
|
||||
def find_bookmark_file (profile="*.default"):
|
||||
def find_bookmark_file(profile="*.default"):
|
||||
"""Return the first found places.sqlite file of the profile directories
|
||||
ending with '.default' (or another given profile name).
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
|
|
@ -57,7 +57,7 @@ def find_bookmark_file (profile="*.default"):
|
|||
return ""
|
||||
|
||||
|
||||
def parse_bookmark_file (filename):
|
||||
def parse_bookmark_file(filename):
|
||||
"""Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
Returns None if sqlite3 module is not installed.
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ OperaBookmarkFiles = (
|
|||
)
|
||||
|
||||
|
||||
def get_profile_dir ():
|
||||
def get_profile_dir():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
if os.name == 'nt':
|
||||
basedir = unicode(os.environ["APPDATA"], nt_filename_encoding)
|
||||
|
|
@ -35,7 +35,7 @@ def get_profile_dir ():
|
|||
return dirpath
|
||||
|
||||
|
||||
def find_bookmark_file ():
|
||||
def find_bookmark_file():
|
||||
"""Return the bookmark file of the Opera profile.
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
could be found.
|
||||
|
|
@ -52,7 +52,7 @@ def find_bookmark_file ():
|
|||
return ""
|
||||
|
||||
|
||||
def parse_bookmark_data (data):
|
||||
def parse_bookmark_data(data):
|
||||
"""Return iterator for bookmarks of the form (url, name, line number).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -24,12 +24,12 @@ except ImportError:
|
|||
has_biplist = False
|
||||
|
||||
|
||||
def get_profile_dir ():
|
||||
def get_profile_dir():
|
||||
"""Return path where all profiles of current user are stored."""
|
||||
return os.path.join(os.environ["HOME"], "Library", "Safari")
|
||||
|
||||
|
||||
def find_bookmark_file ():
|
||||
def find_bookmark_file():
|
||||
"""Return the bookmark file of the Default profile.
|
||||
Returns absolute filename if found, or empty string if no bookmark file
|
||||
could be found.
|
||||
|
|
@ -47,21 +47,21 @@ def find_bookmark_file ():
|
|||
return ""
|
||||
|
||||
|
||||
def parse_bookmark_file (filename):
|
||||
def parse_bookmark_file(filename):
|
||||
"""Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
return parse_plist(get_plist_data_from_file(filename))
|
||||
|
||||
|
||||
def parse_bookmark_data (data):
|
||||
def parse_bookmark_data(data):
|
||||
"""Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
return parse_plist(get_plist_data_from_string(data))
|
||||
|
||||
|
||||
def get_plist_data_from_file (filename):
|
||||
def get_plist_data_from_file(filename):
|
||||
"""Parse plist data for a file. Tries biplist, falling back to
|
||||
plistlib."""
|
||||
if has_biplist:
|
||||
|
|
@ -74,7 +74,7 @@ def get_plist_data_from_file (filename):
|
|||
return {}
|
||||
|
||||
|
||||
def get_plist_data_from_string (data):
|
||||
def get_plist_data_from_string(data):
|
||||
"""Parse plist data for a string. Tries biplist, falling back to
|
||||
plistlib."""
|
||||
if has_biplist:
|
||||
|
|
@ -105,11 +105,11 @@ def parse_plist(entry):
|
|||
yield item
|
||||
|
||||
|
||||
def is_leaf (entry):
|
||||
def is_leaf(entry):
|
||||
"""Return true if plist entry is an URL entry."""
|
||||
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeLeaf'
|
||||
|
||||
|
||||
def has_children (entry):
|
||||
def has_children(entry):
|
||||
"""Return true if plist entry has children."""
|
||||
return entry.get(KEY_WEBBOOKMARKTYPE) == 'WebBookmarkTypeList'
|
||||
|
|
|
|||
6
linkcheck/cache/robots_txt.py
vendored
6
linkcheck/cache/robots_txt.py
vendored
|
|
@ -33,7 +33,7 @@ class RobotsTxt:
|
|||
format: {cache key (string) -> robots.txt content (RobotFileParser)}
|
||||
"""
|
||||
|
||||
def __init__ (self, useragent):
|
||||
def __init__(self, useragent):
|
||||
"""Initialize per-URL robots.txt cache."""
|
||||
# mapping {URL -> parsed robots.txt}
|
||||
self.cache = LFUCache(size=100)
|
||||
|
|
@ -41,13 +41,13 @@ class RobotsTxt:
|
|||
self.roboturl_locks = {}
|
||||
self.useragent = useragent
|
||||
|
||||
def allows_url (self, url_data):
|
||||
def allows_url(self, url_data):
|
||||
"""Ask robots.txt allowance."""
|
||||
roboturl = url_data.get_robots_txt_url()
|
||||
with self.get_lock(roboturl):
|
||||
return self._allows_url(url_data, roboturl)
|
||||
|
||||
def _allows_url (self, url_data, roboturl):
|
||||
def _allows_url(self, url_data, roboturl):
|
||||
"""Ask robots.txt allowance. Assumes only single thread per robots.txt
|
||||
URL calls this function."""
|
||||
with cache_lock:
|
||||
|
|
|
|||
24
linkcheck/cache/urlqueue.py
vendored
24
linkcheck/cache/urlqueue.py
vendored
|
|
@ -37,7 +37,7 @@ class UrlQueue:
|
|||
"""A queue supporting several consumer tasks. The task_done() idea is
|
||||
from the Python 2.5 implementation of Queue.Queue()."""
|
||||
|
||||
def __init__ (self, max_allowed_urls=None):
|
||||
def __init__(self, max_allowed_urls=None):
|
||||
"""Initialize the queue state and task counters."""
|
||||
# Note: don't put a maximum size on the queue since it would
|
||||
# lead to deadlocks when all worker threads called put().
|
||||
|
|
@ -62,31 +62,31 @@ class UrlQueue:
|
|||
self.max_allowed_urls = max_allowed_urls
|
||||
self.num_puts = 0
|
||||
|
||||
def qsize (self):
|
||||
def qsize(self):
|
||||
"""Return the approximate size of the queue (not reliable!)."""
|
||||
with self.mutex:
|
||||
return len(self.queue)
|
||||
|
||||
def empty (self):
|
||||
def empty(self):
|
||||
"""Return True if the queue is empty, False otherwise.
|
||||
Result is thread-safe, but not reliable since the queue could have
|
||||
been changed before the result is returned!"""
|
||||
with self.mutex:
|
||||
return self._empty()
|
||||
|
||||
def _empty (self):
|
||||
def _empty(self):
|
||||
"""Return True if the queue is empty, False otherwise.
|
||||
Not thread-safe!"""
|
||||
return not self.queue
|
||||
|
||||
def get (self, timeout=None):
|
||||
def get(self, timeout=None):
|
||||
"""Get first not-in-progress url from the queue and
|
||||
return it. If no such url is available return None.
|
||||
"""
|
||||
with self.not_empty:
|
||||
return self._get(timeout)
|
||||
|
||||
def _get (self, timeout):
|
||||
def _get(self, timeout):
|
||||
"""Non thread-safe utility function of self.get() doing the real
|
||||
work."""
|
||||
if timeout is None:
|
||||
|
|
@ -104,7 +104,7 @@ class UrlQueue:
|
|||
self.in_progress += 1
|
||||
return self.queue.popleft()
|
||||
|
||||
def put (self, item):
|
||||
def put(self, item):
|
||||
"""Put an item into the queue.
|
||||
Block if necessary until a free slot is available.
|
||||
"""
|
||||
|
|
@ -112,7 +112,7 @@ class UrlQueue:
|
|||
self._put(item)
|
||||
self.not_empty.notify()
|
||||
|
||||
def _put (self, url_data):
|
||||
def _put(self, url_data):
|
||||
"""Put URL in queue, increase number of unfinished tasks."""
|
||||
if self.shutdown or self.max_allowed_urls == 0:
|
||||
return
|
||||
|
|
@ -154,7 +154,7 @@ class UrlQueue:
|
|||
self.queue.rotate(pos)
|
||||
self.queue.appendleft(item)
|
||||
|
||||
def task_done (self, url_data):
|
||||
def task_done(self, url_data):
|
||||
"""
|
||||
Indicate that a formerly enqueued task is complete.
|
||||
|
||||
|
|
@ -179,7 +179,7 @@ class UrlQueue:
|
|||
raise ValueError('task_done() called too many times')
|
||||
self.all_tasks_done.notifyAll()
|
||||
|
||||
def join (self, timeout=None):
|
||||
def join(self, timeout=None):
|
||||
"""Blocks until all items in the Queue have been gotten and processed.
|
||||
|
||||
The count of unfinished tasks goes up whenever an item is added to the
|
||||
|
|
@ -202,7 +202,7 @@ class UrlQueue:
|
|||
raise Timeout()
|
||||
self.all_tasks_done.wait(remaining)
|
||||
|
||||
def do_shutdown (self):
|
||||
def do_shutdown(self):
|
||||
"""Shutdown the queue by not accepting any more URLs."""
|
||||
with self.mutex:
|
||||
unfinished = self.unfinished_tasks - len(self.queue)
|
||||
|
|
@ -214,7 +214,7 @@ class UrlQueue:
|
|||
self.unfinished_tasks = unfinished
|
||||
self.shutdown = True
|
||||
|
||||
def status (self):
|
||||
def status(self):
|
||||
"""Get tuple (finished tasks, in progress, queue size)."""
|
||||
# no need to acquire self.mutex since the numbers are unreliable anyways.
|
||||
return (self.finished_tasks, self.in_progress, len(self.queue))
|
||||
|
|
|
|||
|
|
@ -43,7 +43,7 @@ def guess_url(url):
|
|||
return url
|
||||
|
||||
|
||||
def absolute_url (base_url, base_ref, parent_url):
|
||||
def absolute_url(base_url, base_ref, parent_url):
|
||||
"""
|
||||
Search for the absolute url to detect the link type. This does not
|
||||
join any url fragments together!
|
||||
|
|
@ -64,7 +64,7 @@ def absolute_url (base_url, base_ref, parent_url):
|
|||
return ""
|
||||
|
||||
|
||||
def get_url_from (base_url, recursion_level, aggregate,
|
||||
def get_url_from(base_url, recursion_level, aggregate,
|
||||
parent_url=None, base_ref=None, line=None, column=None,
|
||||
page=0, name="", parent_content_type=None, extern=None, url_encoding=None):
|
||||
"""
|
||||
|
|
@ -125,7 +125,7 @@ def get_url_from (base_url, recursion_level, aggregate,
|
|||
line=line, column=column, page=page, name=name, extern=extern, url_encoding=url_encoding)
|
||||
|
||||
|
||||
def get_urlclass_from (scheme, assume_local_file=False):
|
||||
def get_urlclass_from(scheme, assume_local_file=False):
|
||||
"""Return checker class for given URL scheme. If the scheme
|
||||
cannot be matched and assume_local_file is True, assume a local file.
|
||||
"""
|
||||
|
|
@ -154,7 +154,7 @@ def get_urlclass_from (scheme, assume_local_file=False):
|
|||
return klass
|
||||
|
||||
|
||||
def get_index_html (urls):
|
||||
def get_index_html(urls):
|
||||
"""
|
||||
Construct artificial index.html from given URLs.
|
||||
|
||||
|
|
|
|||
|
|
@ -22,12 +22,12 @@ import socket
|
|||
from . import urlbase
|
||||
|
||||
|
||||
class DnsUrl (urlbase.UrlBase):
|
||||
class DnsUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with dns scheme.
|
||||
"""
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
dns: URLs do not have any content
|
||||
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ from ..bookmarks import firefox
|
|||
from .const import WARN_FILE_MISSING_SLASH, WARN_FILE_SYSTEM_PATH
|
||||
|
||||
|
||||
def get_files (dirname):
|
||||
def get_files(dirname):
|
||||
"""Get iterator of entries in directory. Only allows regular files
|
||||
and directories, no symlinks."""
|
||||
for entry in os.listdir(dirname):
|
||||
|
|
@ -43,7 +43,7 @@ def get_files (dirname):
|
|||
yield entry+"/"
|
||||
|
||||
|
||||
def prepare_urlpath_for_nt (path):
|
||||
def prepare_urlpath_for_nt(path):
|
||||
"""
|
||||
URLs like 'file://server/path/' result in a path named '/server/path'.
|
||||
However urllib.url2pathname expects '////server/path'.
|
||||
|
|
@ -53,7 +53,7 @@ def prepare_urlpath_for_nt (path):
|
|||
return path
|
||||
|
||||
|
||||
def get_nt_filename (path):
|
||||
def get_nt_filename(path):
|
||||
"""Return case sensitive filename for NT path."""
|
||||
unc, rest = os.path.splitunc(path)
|
||||
head, tail = os.path.split(rest)
|
||||
|
|
@ -66,7 +66,7 @@ def get_nt_filename (path):
|
|||
return path
|
||||
|
||||
|
||||
def get_os_filename (path):
|
||||
def get_os_filename(path):
|
||||
"""Return filesystem path for given URL path."""
|
||||
if os.name == 'nt':
|
||||
path = prepare_urlpath_for_nt(path)
|
||||
|
|
@ -77,7 +77,7 @@ def get_os_filename (path):
|
|||
return res
|
||||
|
||||
|
||||
def is_absolute_path (path):
|
||||
def is_absolute_path(path):
|
||||
"""Check if given path is absolute. On Windows absolute paths start
|
||||
with a drive letter. On all other systems absolute paths start with
|
||||
a slash."""
|
||||
|
|
@ -88,12 +88,12 @@ def is_absolute_path (path):
|
|||
return path.startswith("/")
|
||||
|
||||
|
||||
class FileUrl (urlbase.UrlBase):
|
||||
class FileUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with file scheme.
|
||||
"""
|
||||
|
||||
def init (self, base_ref, base_url, parent_url, recursion_level,
|
||||
def init(self, base_ref, base_url, parent_url, recursion_level,
|
||||
aggregate, line, column, page, name, url_encoding, extern):
|
||||
"""Initialize the scheme."""
|
||||
super(FileUrl, self).init(base_ref, base_url, parent_url,
|
||||
|
|
@ -128,7 +128,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
base_url = re.sub("^file://([^/])", r"file:///\1", base_url)
|
||||
self.base_url = base_url
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Calls super.build_url() and adds a trailing slash to directories.
|
||||
"""
|
||||
|
|
@ -154,7 +154,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
self.urlparts[2] += '/'
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Get size of file content and modification time from filename path."""
|
||||
if self.is_directory():
|
||||
# Directory size always differs from the customer index.html
|
||||
|
|
@ -164,7 +164,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
self.size = fileutil.get_size(filename)
|
||||
self.modified = datetime.utcfromtimestamp(fileutil.get_mtime(filename))
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Try to open the local file. Under NT systems the case sensitivity
|
||||
is checked.
|
||||
|
|
@ -180,7 +180,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
self.url_connection = urllib.request.urlopen(url)
|
||||
self.check_case_sensitivity()
|
||||
|
||||
def check_case_sensitivity (self):
|
||||
def check_case_sensitivity(self):
|
||||
"""
|
||||
Check if url and windows path name match cases
|
||||
else there might be problems when copying such
|
||||
|
|
@ -197,7 +197,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
{"path": path, "realpath": realpath},
|
||||
tag=WARN_FILE_SYSTEM_PATH)
|
||||
|
||||
def read_content (self):
|
||||
def read_content(self):
|
||||
"""Return file content, or in case of directories a dummy HTML file
|
||||
with links to the files."""
|
||||
if self.is_directory():
|
||||
|
|
@ -208,7 +208,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
data = super(FileUrl, self).read_content()
|
||||
return data
|
||||
|
||||
def get_os_filename (self):
|
||||
def get_os_filename(self):
|
||||
"""
|
||||
Construct os specific file path out of the file:// URL.
|
||||
|
||||
|
|
@ -217,11 +217,11 @@ class FileUrl (urlbase.UrlBase):
|
|||
"""
|
||||
return get_os_filename(self.urlparts[2])
|
||||
|
||||
def get_temp_filename (self):
|
||||
def get_temp_filename(self):
|
||||
"""Get filename for content to parse."""
|
||||
return self.get_os_filename()
|
||||
|
||||
def is_directory (self):
|
||||
def is_directory(self):
|
||||
"""
|
||||
Check if file is a directory.
|
||||
|
||||
|
|
@ -231,7 +231,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
filename = self.get_os_filename()
|
||||
return os.path.isdir(filename) and not os.path.islink(filename)
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""Check if content is parseable for recursion.
|
||||
|
||||
@return: True if content is parseable
|
||||
|
|
@ -246,7 +246,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
log.debug(LOG_CHECK, "File with content type %r is not parseable.", self.content_type)
|
||||
return False
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Return URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
if self.url:
|
||||
|
|
@ -254,7 +254,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
else:
|
||||
self.content_type = ""
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""Get pattern for intern URL matching.
|
||||
|
||||
@return non-empty regex pattern or None
|
||||
|
|
@ -271,7 +271,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
url = url[:i+1]
|
||||
return re.escape(url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
"""If a local webroot directory is configured, replace absolute URLs
|
||||
with it. After that queue the URL data for checking."""
|
||||
webroot = self.aggregate.config["localwebroot"]
|
||||
|
|
|
|||
|
|
@ -25,12 +25,12 @@ from . import proxysupport, httpurl, internpaturl, get_index_html
|
|||
from .const import WARN_FTP_MISSING_SLASH
|
||||
|
||||
|
||||
class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
class FtpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
"""
|
||||
Url link with ftp scheme.
|
||||
"""
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Initialize FTP url data.
|
||||
"""
|
||||
|
|
@ -41,7 +41,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.filename = None
|
||||
self.filename_encoding = 'iso-8859-1'
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
In case of proxy, delegate to HttpUrl. Else check in this
|
||||
order: login, changing directory, list the file.
|
||||
|
|
@ -67,7 +67,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.files = []
|
||||
return None
|
||||
|
||||
def login (self):
|
||||
def login(self):
|
||||
"""Log into ftp server and check the welcome message."""
|
||||
self.url_connection = ftplib.FTP(timeout=self.aggregate.config["timeout"])
|
||||
if log.is_debug(LOG_CHECK):
|
||||
|
|
@ -93,7 +93,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
raise LinkCheckerError(
|
||||
_("Remote host has closed connection: %(msg)s") % str(msg))
|
||||
|
||||
def negotiate_encoding (self):
|
||||
def negotiate_encoding(self):
|
||||
"""Check if server can handle UTF-8 encoded filenames.
|
||||
See also RFC 2640."""
|
||||
try:
|
||||
|
|
@ -106,7 +106,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if " UTF-8" in features.splitlines():
|
||||
self.filename_encoding = "utf-8"
|
||||
|
||||
def cwd (self):
|
||||
def cwd(self):
|
||||
"""
|
||||
Change to URL parent directory. Return filename of last path
|
||||
component.
|
||||
|
|
@ -122,7 +122,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.cwd(d)
|
||||
return filename
|
||||
|
||||
def listfile (self):
|
||||
def listfile(self):
|
||||
"""
|
||||
See if filename is in the current FTP directory.
|
||||
"""
|
||||
|
|
@ -143,11 +143,11 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return
|
||||
raise ftplib.error_perm("550 File not found")
|
||||
|
||||
def get_files (self):
|
||||
def get_files(self):
|
||||
"""Get list of filenames in directory. Subdirectories have an
|
||||
ending slash."""
|
||||
files = []
|
||||
def add_entry (line):
|
||||
def add_entry(line):
|
||||
"""Parse list line and add the entry it points to to the file
|
||||
list."""
|
||||
log.debug(LOG_CHECK, "Directory entry %r", line)
|
||||
|
|
@ -162,7 +162,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.url_connection.dir(add_entry)
|
||||
return files
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""See if URL target is parseable for recursion."""
|
||||
if self.is_directory():
|
||||
return True
|
||||
|
|
@ -171,18 +171,18 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
log.debug(LOG_CHECK, "URL with content type %r is not parseable.", self.content_type)
|
||||
return False
|
||||
|
||||
def is_directory (self):
|
||||
def is_directory(self):
|
||||
"""See if URL target is a directory."""
|
||||
# either the path is empty, or ends with a slash
|
||||
path = self.urlparts[2]
|
||||
return (not path) or path.endswith('/')
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Set URL content type, or an empty string if content
|
||||
type could not be found."""
|
||||
self.content_type = mimeutil.guess_mimetype(self.url, read=self.get_content)
|
||||
|
||||
def read_content (self):
|
||||
def read_content(self):
|
||||
"""Return URL target content, or in case of directories a dummy HTML
|
||||
file with links to the files."""
|
||||
if self.is_directory():
|
||||
|
|
@ -194,7 +194,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# download file in BINARY mode
|
||||
ftpcmd = "RETR %s" % self.filename
|
||||
buf = StringIO()
|
||||
def stor_data (s):
|
||||
def stor_data(s):
|
||||
"""Helper method storing given data"""
|
||||
# limit the download size
|
||||
if (buf.tell() + len(s)) > self.max_size:
|
||||
|
|
@ -205,7 +205,7 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
buf.close()
|
||||
return data
|
||||
|
||||
def close_connection (self):
|
||||
def close_connection(self):
|
||||
"""Release the open connection from the connection pool."""
|
||||
if self.url_connection is not None:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -45,12 +45,13 @@ unicode_safe = strformat.unicode_safe
|
|||
# match for robots meta element content attribute
|
||||
nofollow_re = re.compile(r"\bnofollow\b", re.IGNORECASE)
|
||||
|
||||
class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
|
||||
class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
"""
|
||||
Url link with http scheme.
|
||||
"""
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Initialize HTTP specific variables.
|
||||
"""
|
||||
|
|
@ -62,7 +63,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.ssl_cipher = None
|
||||
self.ssl_cert = None
|
||||
|
||||
def allows_robots (self, url):
|
||||
def allows_robots(self, url):
|
||||
"""
|
||||
Fetch and parse the robots.txt of given url. Checks if LinkChecker
|
||||
can get the requested resource content.
|
||||
|
|
@ -74,7 +75,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"""
|
||||
return not self.aggregate.config['robotstxt'] or self.aggregate.robots_txt.allows_url(self)
|
||||
|
||||
def content_allows_robots (self):
|
||||
def content_allows_robots(self):
|
||||
"""
|
||||
Return False if the content of this URL forbids robots to
|
||||
search for recursive links.
|
||||
|
|
@ -85,7 +86,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
soup = self.get_soup()
|
||||
return not soup.find("meta", attrs={"name": "robots", "content": nofollow_re})
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Get size of URL content from HTTP header."""
|
||||
if self.headers and "Content-Length" in self.headers and \
|
||||
"Transfer-Encoding" not in self.headers:
|
||||
|
|
@ -98,7 +99,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
self.size = -1
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Check a URL with HTTP protocol.
|
||||
Here is an excerpt from RFC 1945 with common response codes:
|
||||
|
|
@ -204,7 +205,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
else:
|
||||
self.ssl_cert = None
|
||||
|
||||
def construct_auth (self):
|
||||
def construct_auth(self):
|
||||
"""Construct HTTP Basic authentication credentials if there
|
||||
is user/password information available. Does not overwrite if
|
||||
credentials have already been constructed."""
|
||||
|
|
@ -214,7 +215,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if _user is not None and _password is not None:
|
||||
self.auth = (_user, _password)
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Return content MIME type or empty string."""
|
||||
self.content_type = httputil.get_content_type(self.headers)
|
||||
|
||||
|
|
@ -269,7 +270,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# run connection plugins for old connection
|
||||
self.aggregate.plugin_manager.run_connection_plugins(self)
|
||||
|
||||
def getheader (self, name, default=None):
|
||||
def getheader(self, name, default=None):
|
||||
"""Get decoded header value.
|
||||
|
||||
@return: decoded header value or default of not found
|
||||
|
|
@ -280,7 +281,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return default
|
||||
return unicode_safe(value, encoding=HEADER_ENCODING)
|
||||
|
||||
def check_response (self):
|
||||
def check_response(self):
|
||||
"""Check final result and log it."""
|
||||
if self.url_connection.status_code >= 400:
|
||||
self.set_result("%d %s" % (self.url_connection.status_code, self.url_connection.reason),
|
||||
|
|
@ -332,7 +333,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
name = "Content-Location: header"
|
||||
self.add_url(url, name=name)
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""
|
||||
Check if content is parseable for recursion.
|
||||
|
||||
|
|
@ -352,7 +353,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
return False
|
||||
return True
|
||||
|
||||
def get_robots_txt_url (self):
|
||||
def get_robots_txt_url(self):
|
||||
"""
|
||||
Get the according robots.txt URL for this URL.
|
||||
|
||||
|
|
|
|||
|
|
@ -19,9 +19,9 @@ Handle ignored URLs.
|
|||
|
||||
from . import unknownurl
|
||||
|
||||
class IgnoreUrl (unknownurl.UnknownUrl):
|
||||
class IgnoreUrl(unknownurl.UnknownUrl):
|
||||
"""Always ignored URL."""
|
||||
|
||||
def is_ignored (self):
|
||||
def is_ignored(self):
|
||||
"""Return True if this URL scheme is ignored."""
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from . import urlbase, absolute_url
|
|||
from .. import strformat, url as urlutil
|
||||
|
||||
|
||||
def get_intern_pattern (url):
|
||||
def get_intern_pattern(url):
|
||||
"""Return intern pattern for given URL. Redirections to the same
|
||||
domain with or without "www." prepended are allowed."""
|
||||
parts = strformat.url_unicode_split(url)
|
||||
|
|
@ -45,10 +45,10 @@ def get_intern_pattern (url):
|
|||
return "^%s://%s%s" % tuple(args)
|
||||
|
||||
|
||||
class InternPatternUrl (urlbase.UrlBase):
|
||||
class InternPatternUrl(urlbase.UrlBase):
|
||||
"""Class supporting an intern URL pattern."""
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""
|
||||
Get pattern for intern URL matching.
|
||||
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from ..network import iputil
|
|||
from .const import WARN_MAIL_NO_MX_HOST
|
||||
|
||||
|
||||
def getaddresses (addr):
|
||||
def getaddresses(addr):
|
||||
"""Return list of email addresses from given field value."""
|
||||
parsed = [mail for name, mail in AddressList(addr).addresslist if mail]
|
||||
if parsed:
|
||||
|
|
@ -41,19 +41,19 @@ def getaddresses (addr):
|
|||
return addresses
|
||||
|
||||
|
||||
def is_quoted (addr):
|
||||
def is_quoted(addr):
|
||||
"""Return True iff mail address string is quoted."""
|
||||
return addr.startswith('"') and addr.endswith('"')
|
||||
|
||||
|
||||
def is_literal (domain):
|
||||
def is_literal(domain):
|
||||
"""Return True iff domain string is a literal."""
|
||||
return domain.startswith('[') and domain.endswith(']')
|
||||
|
||||
|
||||
_remove_quoted = re.compile(r'\\.').sub
|
||||
_quotes = re.compile(r'["\\]')
|
||||
def is_missing_quote (addr):
|
||||
def is_missing_quote(addr):
|
||||
"""Return True iff mail address is not correctly quoted."""
|
||||
return _quotes.match(_remove_quoted("", addr[1:-1]))
|
||||
|
||||
|
|
@ -62,12 +62,12 @@ def is_missing_quote (addr):
|
|||
EMAIL_CGI_ADDRESS = ("to", "cc", "bcc")
|
||||
EMAIL_CGI_SUBJECT = "subject"
|
||||
|
||||
class MailtoUrl (urlbase.UrlBase):
|
||||
class MailtoUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with mailto scheme.
|
||||
"""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""Call super.build_url(), extract list of mail addresses from URL,
|
||||
and check their syntax.
|
||||
"""
|
||||
|
|
@ -84,7 +84,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
self.add_warning(_("No mail addresses or email subject found in `%(url)s'.") % \
|
||||
{"url": self.url})
|
||||
|
||||
def parse_addresses (self):
|
||||
def parse_addresses(self):
|
||||
"""Parse all mail addresses out of the URL target. Also parses
|
||||
optional CGI headers like "?to=foo@example.org".
|
||||
Stores parsed addresses in the self.addresses set.
|
||||
|
|
@ -127,7 +127,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
self.addresses.update(getaddresses(url))
|
||||
log.debug(LOG_CHECK, "addresses: %s", self.addresses)
|
||||
|
||||
def check_email_syntax (self, mail):
|
||||
def check_email_syntax(self, mail):
|
||||
"""Check email syntax. The relevant RFCs:
|
||||
- How to check names (memo):
|
||||
http://tools.ietf.org/html/rfc3696
|
||||
|
|
@ -220,7 +220,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
{"addr": mail}, valid=False, overwrite=False)
|
||||
return
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Verify a list of email addresses. If one address fails,
|
||||
the whole list will fail.
|
||||
|
|
@ -235,7 +235,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
if not self.valid:
|
||||
break
|
||||
|
||||
def check_smtp_domain (self, mail):
|
||||
def check_smtp_domain(self, mail):
|
||||
"""
|
||||
Check a single mail address.
|
||||
"""
|
||||
|
|
@ -292,7 +292,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
emails = ",".join(sorted(self.addresses))
|
||||
self.cache_url = "%s:%s" % (self.scheme, emails)
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
mailto: URLs do not have any content
|
||||
|
||||
|
|
|
|||
|
|
@ -28,12 +28,12 @@ from .const import WARN_NNTP_NO_SERVER, WARN_NNTP_NO_NEWSGROUP
|
|||
|
||||
random.seed()
|
||||
|
||||
class NntpUrl (urlbase.UrlBase):
|
||||
class NntpUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with NNTP scheme.
|
||||
"""
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Connect to NNTP server and try to request the URL article
|
||||
resource (if specified).
|
||||
|
|
@ -64,7 +64,7 @@ class NntpUrl (urlbase.UrlBase):
|
|||
self.add_warning(_("No newsgroup specified in NNTP URL."),
|
||||
tag=WARN_NNTP_NO_NEWSGROUP)
|
||||
|
||||
def _connect_nntp (self, nntpserver):
|
||||
def _connect_nntp(self, nntpserver):
|
||||
"""
|
||||
This is done only once per checking task. Also, the newly
|
||||
introduced error codes 504 and 505 (both inclining "Too busy, retry
|
||||
|
|
@ -91,11 +91,11 @@ class NntpUrl (urlbase.UrlBase):
|
|||
self.add_info(nntp.getwelcome())
|
||||
return nntp
|
||||
|
||||
def wait (self):
|
||||
def wait(self):
|
||||
"""Wait some time before trying to connect again."""
|
||||
time.sleep(random.randrange(10, 30))
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
NNTP urls have no content.
|
||||
|
||||
|
|
|
|||
|
|
@ -25,7 +25,7 @@ from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
|
|||
class ProxySupport:
|
||||
"""Get support for proxying and for URLs with user:pass@host setting."""
|
||||
|
||||
def set_proxy (self, proxy):
|
||||
def set_proxy(self, proxy):
|
||||
"""Parse given proxy information and store parsed values.
|
||||
Note that only http:// proxies are supported, both for ftp://
|
||||
and http:// URLs.
|
||||
|
|
@ -60,7 +60,7 @@ class ProxySupport:
|
|||
auth = "%s:%s" % (username, password)
|
||||
self.proxyauth = "Basic "+httputil.encode_base64(auth)
|
||||
|
||||
def ignore_proxy_host (self):
|
||||
def ignore_proxy_host(self):
|
||||
"""Check if self.host is in the $no_proxy ignore list."""
|
||||
if urllib.request.proxy_bypass(self.host):
|
||||
return True
|
||||
|
|
|
|||
|
|
@ -27,12 +27,12 @@ def encode(s, encoding="iso-8859-1", errors="ignore"):
|
|||
return s.encode(encoding, errors)
|
||||
|
||||
|
||||
class TelnetUrl (urlbase.UrlBase):
|
||||
class TelnetUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with telnet scheme.
|
||||
"""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Call super.build_url(), set default telnet port and initialize
|
||||
the login credentials.
|
||||
|
|
@ -44,7 +44,7 @@ class TelnetUrl (urlbase.UrlBase):
|
|||
# set user/pass
|
||||
self.user, self.password = self.get_user_password()
|
||||
|
||||
def local_check (self):
|
||||
def local_check(self):
|
||||
"""
|
||||
Warn about empty host names. Else call super.local_check().
|
||||
"""
|
||||
|
|
@ -53,7 +53,7 @@ class TelnetUrl (urlbase.UrlBase):
|
|||
return
|
||||
super(TelnetUrl, self).local_check()
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
Open a telnet connection and try to login. Expected login
|
||||
label is "login: ", expected password label is "Password: ".
|
||||
|
|
@ -71,7 +71,7 @@ class TelnetUrl (urlbase.UrlBase):
|
|||
# XXX how to tell if we are logged in??
|
||||
self.url_connection.write(b"exit\n")
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""
|
||||
Telnet URLs have no content.
|
||||
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@ import re
|
|||
from . import urlbase
|
||||
|
||||
|
||||
class UnknownUrl (urlbase.UrlBase):
|
||||
class UnknownUrl(urlbase.UrlBase):
|
||||
"""Handle unknown or just plain broken URLs."""
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""Only logs that this URL is unknown."""
|
||||
super(UnknownUrl, self).build_url()
|
||||
if self.is_ignored():
|
||||
|
|
@ -35,11 +35,11 @@ class UnknownUrl (urlbase.UrlBase):
|
|||
self.set_result(_("URL is unrecognized or has invalid syntax"),
|
||||
valid=False)
|
||||
|
||||
def is_ignored (self):
|
||||
def is_ignored(self):
|
||||
"""Return True if this URL scheme is ignored."""
|
||||
return is_unknown_scheme(self.scheme)
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""Unknown URLs have no content.
|
||||
|
||||
@return: False
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ unicode_safe = strformat.unicode_safe
|
|||
# schemes that are invalid with an empty hostname
|
||||
scheme_requires_host = ("ftp", "http", "telnet")
|
||||
|
||||
def urljoin (parent, url):
|
||||
def urljoin(parent, url):
|
||||
"""
|
||||
If url is relative, join parent and url. Else leave url as-is.
|
||||
|
||||
|
|
@ -56,7 +56,7 @@ def urljoin (parent, url):
|
|||
return urllib.parse.urljoin(parent, url)
|
||||
|
||||
|
||||
def url_norm (url, encoding):
|
||||
def url_norm(url, encoding):
|
||||
"""Wrapper for url.url_norm() to convert UnicodeError in
|
||||
LinkCheckerError."""
|
||||
try:
|
||||
|
|
@ -95,7 +95,7 @@ class UrlBase:
|
|||
# Read in 16kb chunks
|
||||
ReadChunkBytes = 1024*16
|
||||
|
||||
def __init__ (self, base_url, recursion_level, aggregate,
|
||||
def __init__(self, base_url, recursion_level, aggregate,
|
||||
parent_url=None, base_ref=None, line=-1, column=-1, page=-1,
|
||||
name="", url_encoding=None, extern=None):
|
||||
"""
|
||||
|
|
@ -126,7 +126,7 @@ class UrlBase:
|
|||
if not self.has_result:
|
||||
self.set_result(_("filtered"))
|
||||
|
||||
def init (self, base_ref, base_url, parent_url, recursion_level,
|
||||
def init(self, base_ref, base_url, parent_url, recursion_level,
|
||||
aggregate, line, column, page, name, url_encoding, extern):
|
||||
"""
|
||||
Initialize internal data.
|
||||
|
|
@ -162,7 +162,7 @@ class UrlBase:
|
|||
self.add_warning(_("Leading or trailing whitespace in URL `%(url)s'.") %
|
||||
{"url": base_url}, tag=WARN_URL_WHITESPACE)
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""
|
||||
Reset all variables to default values.
|
||||
"""
|
||||
|
|
@ -215,7 +215,7 @@ class UrlBase:
|
|||
# URLs seen through redirections
|
||||
self.aliases = []
|
||||
|
||||
def set_result (self, msg, valid=True, overwrite=False):
|
||||
def set_result(self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
Set result string and validity.
|
||||
"""
|
||||
|
|
@ -233,7 +233,7 @@ class UrlBase:
|
|||
# free content data
|
||||
self.data = None
|
||||
|
||||
def get_title (self):
|
||||
def get_title(self):
|
||||
"""Return title of page the URL refers to.
|
||||
This is per default the filename or the URL."""
|
||||
if self.title is None:
|
||||
|
|
@ -249,17 +249,17 @@ class UrlBase:
|
|||
self.title = title
|
||||
return self.title
|
||||
|
||||
def is_parseable (self):
|
||||
def is_parseable(self):
|
||||
"""
|
||||
Return True iff content of this url is parseable.
|
||||
"""
|
||||
return False
|
||||
|
||||
def is_html (self):
|
||||
def is_html(self):
|
||||
"""Return True iff content of this url is HTML formatted."""
|
||||
return self._is_ctype("html")
|
||||
|
||||
def is_css (self):
|
||||
def is_css(self):
|
||||
"""Return True iff content of this url is CSS stylesheet."""
|
||||
return self._is_ctype("css")
|
||||
|
||||
|
|
@ -270,11 +270,11 @@ class UrlBase:
|
|||
mime = self.content_type
|
||||
return self.ContentMimetypes.get(mime) == ctype
|
||||
|
||||
def is_http (self):
|
||||
def is_http(self):
|
||||
"""Return True for http:// or https:// URLs."""
|
||||
return self.scheme in ("http", "https")
|
||||
|
||||
def is_file (self):
|
||||
def is_file(self):
|
||||
"""Return True for file:// URLs."""
|
||||
return self.scheme == "file"
|
||||
|
||||
|
|
@ -286,7 +286,7 @@ class UrlBase:
|
|||
"""Return True for local (ie. file://) URLs."""
|
||||
return self.is_file()
|
||||
|
||||
def add_warning (self, s, tag=None):
|
||||
def add_warning(self, s, tag=None):
|
||||
"""
|
||||
Add a warning string.
|
||||
"""
|
||||
|
|
@ -295,14 +295,14 @@ class UrlBase:
|
|||
tag not in self.aggregate.config["ignorewarnings"]:
|
||||
self.warnings.append(item)
|
||||
|
||||
def add_info (self, s):
|
||||
def add_info(self, s):
|
||||
"""
|
||||
Add an info string.
|
||||
"""
|
||||
if s not in self.info:
|
||||
self.info.append(s)
|
||||
|
||||
def set_cache_url (self):
|
||||
def set_cache_url(self):
|
||||
"""Set the URL to be used for caching."""
|
||||
# remove anchor from cached target url since we assume
|
||||
# URLs with different anchors to have the same content
|
||||
|
|
@ -310,7 +310,7 @@ class UrlBase:
|
|||
if self.cache_url is not None:
|
||||
assert isinstance(self.cache_url, str_text), repr(self.cache_url)
|
||||
|
||||
def check_syntax (self):
|
||||
def check_syntax(self):
|
||||
"""
|
||||
Called before self.check(), this function inspects the
|
||||
url syntax. Success enables further checking, failure
|
||||
|
|
@ -343,7 +343,7 @@ class UrlBase:
|
|||
args = dict(len=len(self.url), max=URL_MAX_LENGTH)
|
||||
self.add_warning(_("URL length %(len)d is longer than %(max)d.") % args, tag=WARN_URL_TOO_LONG)
|
||||
|
||||
def build_url (self):
|
||||
def build_url(self):
|
||||
"""
|
||||
Construct self.url and self.urlparts out of the given base
|
||||
url information self.base_url, self.parent_url and self.base_ref.
|
||||
|
|
@ -378,7 +378,7 @@ class UrlBase:
|
|||
# and unsplit again
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def build_url_parts (self):
|
||||
def build_url_parts(self):
|
||||
"""Set userinfo, host, port and anchor from self.urlparts.
|
||||
Also checks for obfuscated IP addresses.
|
||||
"""
|
||||
|
|
@ -409,7 +409,7 @@ class UrlBase:
|
|||
if self.anchor is not None:
|
||||
assert isinstance(self.anchor, str_text), repr(self.anchor)
|
||||
|
||||
def check_obfuscated_ip (self):
|
||||
def check_obfuscated_ip(self):
|
||||
"""Warn if host of this URL is obfuscated IP address."""
|
||||
# check if self.host can be an IP address
|
||||
# check for obfuscated IP address
|
||||
|
|
@ -422,7 +422,7 @@ class UrlBase:
|
|||
{"url": self.base_url, "ip": ips[0]},
|
||||
tag=WARN_URL_OBFUSCATED_IP)
|
||||
|
||||
def check (self):
|
||||
def check(self):
|
||||
"""Main check function for checking this URL."""
|
||||
if self.aggregate.config["trace"]:
|
||||
trace.trace_on()
|
||||
|
|
@ -437,7 +437,7 @@ class UrlBase:
|
|||
else:
|
||||
raise
|
||||
|
||||
def local_check (self):
|
||||
def local_check(self):
|
||||
"""Local check function can be overridden in subclasses."""
|
||||
log.debug(LOG_CHECK, "Checking %s", str_text(self))
|
||||
# strict extern URLs should not be checked
|
||||
|
|
@ -476,7 +476,7 @@ class UrlBase:
|
|||
{"msg": str_text(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
|
||||
return False
|
||||
|
||||
def close_connection (self):
|
||||
def close_connection(self):
|
||||
"""
|
||||
Close an opened url connection.
|
||||
"""
|
||||
|
|
@ -490,7 +490,7 @@ class UrlBase:
|
|||
pass
|
||||
self.url_connection = None
|
||||
|
||||
def handle_exception (self):
|
||||
def handle_exception(self):
|
||||
"""
|
||||
An exception occurred. Log it and set the cache flag.
|
||||
"""
|
||||
|
|
@ -510,14 +510,14 @@ class UrlBase:
|
|||
# limit length to 240
|
||||
return strformat.limit(errmsg, length=240)
|
||||
|
||||
def check_connection (self):
|
||||
def check_connection(self):
|
||||
"""
|
||||
The basic connection check uses urlopen to initialize
|
||||
a connection object.
|
||||
"""
|
||||
self.url_connection = urlopen(self.url)
|
||||
|
||||
def add_size_info (self):
|
||||
def add_size_info(self):
|
||||
"""Set size of URL content (if any)..
|
||||
Should be overridden in subclasses."""
|
||||
maxbytes = self.aggregate.config["maxfilesizedownload"]
|
||||
|
|
@ -539,7 +539,7 @@ class UrlBase:
|
|||
return False
|
||||
return True
|
||||
|
||||
def allows_recursion (self):
|
||||
def allows_recursion(self):
|
||||
"""
|
||||
Return True iff we can recurse into the url's content.
|
||||
"""
|
||||
|
|
@ -568,7 +568,7 @@ class UrlBase:
|
|||
"""Returns True: only check robots.txt on HTTP links."""
|
||||
return True
|
||||
|
||||
def set_extern (self, url):
|
||||
def set_extern(self, url):
|
||||
"""
|
||||
Match URL against extern and intern link patterns. If no pattern
|
||||
matches the URL is extern. Sets self.extern to a tuple (bool,
|
||||
|
|
@ -600,12 +600,12 @@ class UrlBase:
|
|||
else:
|
||||
self.extern = (1, 1)
|
||||
|
||||
def set_content_type (self):
|
||||
def set_content_type(self):
|
||||
"""Set content MIME type.
|
||||
Should be overridden in subclasses."""
|
||||
pass
|
||||
|
||||
def can_get_content (self):
|
||||
def can_get_content(self):
|
||||
"""Indicate wether url get_content() can be called."""
|
||||
return self.size <= self.aggregate.config["maxfilesizedownload"]
|
||||
|
||||
|
|
@ -632,7 +632,7 @@ class UrlBase:
|
|||
self.data = self.download_content()
|
||||
return self.data
|
||||
|
||||
def get_content (self):
|
||||
def get_content(self):
|
||||
if self.text is None:
|
||||
self.get_raw_content()
|
||||
self.soup = htmlsoup.make_soup(self.data)
|
||||
|
|
@ -657,7 +657,7 @@ class UrlBase:
|
|||
"""
|
||||
return self.url_connection.read(self.ReadChunkBytes)
|
||||
|
||||
def get_user_password (self):
|
||||
def get_user_password(self):
|
||||
"""Get tuple (user, password) from configured authentication.
|
||||
Both user and password can be None.
|
||||
"""
|
||||
|
|
@ -666,7 +666,7 @@ class UrlBase:
|
|||
return urllib.parse.splitpasswd(self.userinfo)
|
||||
return self.aggregate.config.get_user_password(self.url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, page=0, name="", base=None):
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
"""Add new URL to queue."""
|
||||
if base:
|
||||
base_ref = urlutil.url_norm(base, encoding=self.encoding)[0]
|
||||
|
|
@ -677,7 +677,7 @@ class UrlBase:
|
|||
page=page, name=name, parent_content_type=self.content_type, url_encoding=self.encoding)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def serialized (self, sep=os.linesep):
|
||||
def serialized(self, sep=os.linesep):
|
||||
"""
|
||||
Return serialized url check data as unicode string.
|
||||
"""
|
||||
|
|
@ -696,7 +696,7 @@ class UrlBase:
|
|||
"cache_url=%s" % self.cache_url,
|
||||
])
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
def get_intern_pattern(self, url=None):
|
||||
"""Get pattern for intern URL matching.
|
||||
|
||||
@param url: the URL to set intern pattern for, else self.url
|
||||
|
|
@ -737,7 +737,7 @@ class UrlBase:
|
|||
s = str_text(self)
|
||||
return self.aggregate.config['logger'].encode(s)
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""
|
||||
Get URL info.
|
||||
|
||||
|
|
@ -746,7 +746,7 @@ class UrlBase:
|
|||
"""
|
||||
return "<%s>" % self.serialized(sep=", ")
|
||||
|
||||
def to_wire_dict (self):
|
||||
def to_wire_dict(self):
|
||||
"""Return a simplified transport object for logging and caching.
|
||||
|
||||
The transport object must contain these attributes:
|
||||
|
|
@ -813,7 +813,7 @@ class UrlBase:
|
|||
modified=self.modified,
|
||||
)
|
||||
|
||||
def to_wire (self):
|
||||
def to_wire(self):
|
||||
"""Return compact UrlData object with information from to_wire_dict().
|
||||
"""
|
||||
return CompactUrlData(self.to_wire_dict())
|
||||
|
|
|
|||
|
|
@ -53,7 +53,7 @@ def print_plugins(folders, exit_code=0):
|
|||
sys.exit(exit_code)
|
||||
|
||||
|
||||
def print_usage (msg, exit_code=2):
|
||||
def print_usage(msg, exit_code=2):
|
||||
"""Print a program msg text to stderr and exit."""
|
||||
program = sys.argv[0]
|
||||
print(_("Error: %(msg)s") % {"msg": msg}, file=console.stderr)
|
||||
|
|
@ -61,7 +61,7 @@ def print_usage (msg, exit_code=2):
|
|||
sys.exit(exit_code)
|
||||
|
||||
|
||||
def aggregate_url (aggregate, url, err_exit_code=2):
|
||||
def aggregate_url(aggregate, url, err_exit_code=2):
|
||||
"""Append given commandline URL to input queue."""
|
||||
get_url_from = checker.get_url_from
|
||||
url = checker.guess_url(url)
|
||||
|
|
|
|||
|
|
@ -49,7 +49,7 @@ under certain conditions. Look at the file `LICENSE' within this
|
|||
distribution."""
|
||||
Portable = configdata.portable
|
||||
|
||||
def normpath (path):
|
||||
def normpath(path):
|
||||
"""Norm given system path with all available norm or expand functions
|
||||
in os.path."""
|
||||
expanded = os.path.expanduser(os.path.expandvars(path))
|
||||
|
|
@ -87,12 +87,12 @@ def get_modules_info():
|
|||
return "Modules: %s" % (", ".join(module_infos))
|
||||
|
||||
|
||||
def get_share_dir ():
|
||||
def get_share_dir():
|
||||
"""Return absolute path of LinkChecker example configuration."""
|
||||
return os.path.join(get_install_data(), "share", "linkchecker")
|
||||
|
||||
|
||||
def get_share_file (filename, devel_dir=None):
|
||||
def get_share_file(filename, devel_dir=None):
|
||||
"""Return a filename in the share directory.
|
||||
@param devel_dir: directory to search when developing
|
||||
@ptype devel_dir: string
|
||||
|
|
@ -144,13 +144,13 @@ def get_certifi_file():
|
|||
|
||||
|
||||
# dynamic options
|
||||
class Configuration (dict):
|
||||
class Configuration(dict):
|
||||
"""
|
||||
Storage for configuration options. Options can both be given from
|
||||
the command line as well as from configuration files.
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""
|
||||
Initialize the default options.
|
||||
"""
|
||||
|
|
@ -210,18 +210,18 @@ class Configuration (dict):
|
|||
"""Set the status logger."""
|
||||
self.status_logger = status_logger
|
||||
|
||||
def logger_new (self, loggername, **kwargs):
|
||||
def logger_new(self, loggername, **kwargs):
|
||||
"""Instantiate new logger and return it."""
|
||||
args = self[loggername]
|
||||
args.update(kwargs)
|
||||
return self.loggers[loggername](**args)
|
||||
|
||||
def logger_add (self, loggerclass):
|
||||
def logger_add(self, loggerclass):
|
||||
"""Add a new logger type to the known loggers."""
|
||||
self.loggers[loggerclass.LoggerName] = loggerclass
|
||||
self[loggerclass.LoggerName] = {}
|
||||
|
||||
def read (self, files=None):
|
||||
def read(self, files=None):
|
||||
"""
|
||||
Read settings from given config files.
|
||||
|
||||
|
|
@ -247,7 +247,7 @@ class Configuration (dict):
|
|||
log.debug(LOG_CHECK, "reading configuration from %s", filtered_cfiles)
|
||||
confparse.LCConfigParser(self).read(filtered_cfiles)
|
||||
|
||||
def add_auth (self, user=None, password=None, pattern=None):
|
||||
def add_auth(self, user=None, password=None, pattern=None):
|
||||
"""Add given authentication data."""
|
||||
if not user or not pattern:
|
||||
log.warn(LOG_CHECK,
|
||||
|
|
@ -260,7 +260,7 @@ class Configuration (dict):
|
|||
)
|
||||
self["authentication"].append(entry)
|
||||
|
||||
def get_user_password (self, url):
|
||||
def get_user_password(self, url):
|
||||
"""Get tuple (user, password) from configured authentication
|
||||
that matches the given URL.
|
||||
Both user and password can be None if not specified, or no
|
||||
|
|
@ -275,7 +275,7 @@ class Configuration (dict):
|
|||
"""Get dict with limit per connection type."""
|
||||
return {key: self['maxconnections%s' % key] for key in ('http', 'https', 'ftp')}
|
||||
|
||||
def sanitize (self):
|
||||
def sanitize(self):
|
||||
"Make sure the configuration is consistent."
|
||||
if self['logger'] is None:
|
||||
self.sanitize_logger()
|
||||
|
|
@ -287,14 +287,14 @@ class Configuration (dict):
|
|||
# set default socket timeout
|
||||
socket.setdefaulttimeout(self['timeout'])
|
||||
|
||||
def sanitize_logger (self):
|
||||
def sanitize_logger(self):
|
||||
"""Make logger configuration consistent."""
|
||||
if not self['output']:
|
||||
log.warn(LOG_CHECK, _("activating text logger output."))
|
||||
self['output'] = 'text'
|
||||
self['logger'] = self.logger_new(self['output'])
|
||||
|
||||
def sanitize_loginurl (self):
|
||||
def sanitize_loginurl(self):
|
||||
"""Make login configuration consistent."""
|
||||
url = self["loginurl"]
|
||||
disable = False
|
||||
|
|
@ -322,7 +322,7 @@ class Configuration (dict):
|
|||
_("disabling login URL %(url)s.") % {"url": url})
|
||||
self["loginurl"] = None
|
||||
|
||||
def sanitize_proxies (self):
|
||||
def sanitize_proxies(self):
|
||||
"""Try to read additional proxy settings which urllib does not
|
||||
support."""
|
||||
if os.name != 'posix':
|
||||
|
|
@ -428,7 +428,7 @@ def get_user_config():
|
|||
return userconf
|
||||
|
||||
|
||||
def get_gconf_http_proxy ():
|
||||
def get_gconf_http_proxy():
|
||||
"""Return host:port for GConf HTTP proxy if found, else None."""
|
||||
try:
|
||||
import gconf
|
||||
|
|
@ -449,7 +449,7 @@ def get_gconf_http_proxy ():
|
|||
return None
|
||||
|
||||
|
||||
def get_gconf_ftp_proxy ():
|
||||
def get_gconf_ftp_proxy():
|
||||
"""Return host:port for GConf FTP proxy if found, else None."""
|
||||
try:
|
||||
import gconf
|
||||
|
|
@ -469,7 +469,7 @@ def get_gconf_ftp_proxy ():
|
|||
return None
|
||||
|
||||
|
||||
def get_kde_http_proxy ():
|
||||
def get_kde_http_proxy():
|
||||
"""Return host:port for KDE HTTP proxy if found, else None."""
|
||||
config_dir = get_kde_config_dir()
|
||||
if not config_dir:
|
||||
|
|
@ -483,7 +483,7 @@ def get_kde_http_proxy ():
|
|||
pass
|
||||
|
||||
|
||||
def get_kde_ftp_proxy ():
|
||||
def get_kde_ftp_proxy():
|
||||
"""Return host:port for KDE HTTP proxy if found, else None."""
|
||||
config_dir = get_kde_config_dir()
|
||||
if not config_dir:
|
||||
|
|
@ -527,7 +527,7 @@ def get_kde_ftp_proxy ():
|
|||
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
def get_kde_config_dir ():
|
||||
def get_kde_config_dir():
|
||||
"""Return KDE configuration directory or None if not found."""
|
||||
kde_home = get_kde_home_dir()
|
||||
if not kde_home:
|
||||
|
|
@ -536,12 +536,12 @@ def get_kde_config_dir ():
|
|||
return kde_home_to_config(kde_home)
|
||||
|
||||
|
||||
def kde_home_to_config (kde_home):
|
||||
def kde_home_to_config(kde_home):
|
||||
"""Add subdirectories for config path to KDE home directory."""
|
||||
return os.path.join(kde_home, "share", "config")
|
||||
|
||||
|
||||
def get_kde_home_dir ():
|
||||
def get_kde_home_dir():
|
||||
"""Return KDE home directory or None if not found."""
|
||||
if os.environ.get("KDEHOME"):
|
||||
kde_home = os.path.abspath(os.environ["KDEHOME"])
|
||||
|
|
@ -572,7 +572,7 @@ def get_kde_home_dir ():
|
|||
loc_ro = re.compile(r"\[.*\]$")
|
||||
|
||||
@lru_cache(1)
|
||||
def read_kioslaverc (kde_config_dir):
|
||||
def read_kioslaverc(kde_config_dir):
|
||||
"""Read kioslaverc into data dictionary."""
|
||||
data = {}
|
||||
filename = os.path.join(kde_config_dir, "kioslaverc")
|
||||
|
|
@ -600,14 +600,14 @@ def read_kioslaverc (kde_config_dir):
|
|||
return data
|
||||
|
||||
|
||||
def add_kde_proxy (key, value, data):
|
||||
def add_kde_proxy(key, value, data):
|
||||
"""Add a proxy value to data dictionary after sanity checks."""
|
||||
if not value or value[:3] == "//:":
|
||||
return
|
||||
data[key] = value
|
||||
|
||||
|
||||
def add_kde_setting (key, value, data):
|
||||
def add_kde_setting(key, value, data):
|
||||
"""Add a KDE proxy setting value to data dictionary."""
|
||||
if key == "ProxyType":
|
||||
mode = None
|
||||
|
|
@ -641,12 +641,12 @@ def add_kde_setting (key, value, data):
|
|||
# XXX todo
|
||||
|
||||
|
||||
def split_hosts (value):
|
||||
def split_hosts(value):
|
||||
"""Split comma-separated host list."""
|
||||
return [host for host in value.split(", ") if host]
|
||||
|
||||
|
||||
def resolve_indirect (data, key, splithosts=False):
|
||||
def resolve_indirect(data, key, splithosts=False):
|
||||
"""Replace name of environment variable with its value."""
|
||||
value = data[key]
|
||||
env_value = os.environ.get(value)
|
||||
|
|
@ -659,7 +659,7 @@ def resolve_indirect (data, key, splithosts=False):
|
|||
del data[key]
|
||||
|
||||
|
||||
def resolve_kde_settings (data):
|
||||
def resolve_kde_settings(data):
|
||||
"""Write final proxy configuration values in data dictionary."""
|
||||
if "mode" not in data:
|
||||
return
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ import os
|
|||
from .. import LinkCheckerError, get_link_pat, LOG_CHECK, log, fileutil, plugins, logconf
|
||||
|
||||
|
||||
def read_multiline (value):
|
||||
def read_multiline(value):
|
||||
"""Helper function reading multiline values."""
|
||||
for line in value.splitlines():
|
||||
line = line.strip()
|
||||
|
|
@ -30,17 +30,17 @@ def read_multiline (value):
|
|||
yield line
|
||||
|
||||
|
||||
class LCConfigParser (RawConfigParser):
|
||||
class LCConfigParser(RawConfigParser):
|
||||
"""
|
||||
Parse a LinkChecker configuration file.
|
||||
"""
|
||||
|
||||
def __init__ (self, config):
|
||||
def __init__(self, config):
|
||||
"""Initialize configuration."""
|
||||
super(LCConfigParser, self).__init__()
|
||||
self.config = config
|
||||
|
||||
def read (self, files):
|
||||
def read(self, files):
|
||||
"""Read settings from given config files.
|
||||
|
||||
@raises: LinkCheckerError on syntax errors in the config file(s)
|
||||
|
|
@ -61,7 +61,7 @@ class LCConfigParser (RawConfigParser):
|
|||
raise LinkCheckerError(
|
||||
_("Error parsing configuration: %s") % str(msg))
|
||||
|
||||
def read_string_option (self, section, option, allowempty=False):
|
||||
def read_string_option(self, section, option, allowempty=False):
|
||||
"""Read a string option."""
|
||||
if self.has_option(section, option):
|
||||
value = self.get(section, option)
|
||||
|
|
@ -74,7 +74,7 @@ class LCConfigParser (RawConfigParser):
|
|||
if self.has_option(section, option):
|
||||
self.config[option] = self.getboolean(section, option)
|
||||
|
||||
def read_int_option (self, section, option, key=None, min=None, max=None):
|
||||
def read_int_option(self, section, option, key=None, min=None, max=None):
|
||||
"""Read an integer option."""
|
||||
if self.has_option(section, option):
|
||||
num = self.getint(section, option)
|
||||
|
|
@ -88,7 +88,7 @@ class LCConfigParser (RawConfigParser):
|
|||
key = option
|
||||
self.config[key] = num
|
||||
|
||||
def read_output_config (self):
|
||||
def read_output_config(self):
|
||||
"""Read configuration options in section "output"."""
|
||||
section = "output"
|
||||
from ..logger import LoggerClasses
|
||||
|
|
@ -130,7 +130,7 @@ class LCConfigParser (RawConfigParser):
|
|||
output = self.config.logger_new(val, fileoutput=1)
|
||||
self.config['fileoutput'].append(output)
|
||||
|
||||
def read_checking_config (self):
|
||||
def read_checking_config(self):
|
||||
"""Read configuration options in section "checking"."""
|
||||
section = "checking"
|
||||
self.read_int_option(section, "threads", min=-1)
|
||||
|
|
@ -157,7 +157,7 @@ class LCConfigParser (RawConfigParser):
|
|||
self.read_string_option(section, "sslverify")
|
||||
self.read_int_option(section, "maxrunseconds", min=0)
|
||||
|
||||
def read_authentication_config (self):
|
||||
def read_authentication_config(self):
|
||||
"""Read configuration options in section "authentication"."""
|
||||
section = "authentication"
|
||||
password_fields = []
|
||||
|
|
@ -207,7 +207,7 @@ class LCConfigParser (RawConfigParser):
|
|||
elif os.name == 'nt':
|
||||
log.warn(LOG_CHECK, _("See http://support.microsoft.com/kb/308419 for more info on setting file permissions."))
|
||||
|
||||
def read_filtering_config (self):
|
||||
def read_filtering_config(self):
|
||||
"""
|
||||
Read configuration options in section "filtering".
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -17,17 +17,17 @@
|
|||
Special container classes.
|
||||
"""
|
||||
|
||||
class LFUCache (dict):
|
||||
class LFUCache(dict):
|
||||
"""Limited cache which purges least frequently used items."""
|
||||
|
||||
def __init__ (self, size=1000):
|
||||
def __init__(self, size=1000):
|
||||
"""Initialize internal LFU cache."""
|
||||
super(LFUCache, self).__init__()
|
||||
if size < 1:
|
||||
raise ValueError("invalid cache size %d" % size)
|
||||
self.size = size
|
||||
|
||||
def __setitem__ (self, key, val):
|
||||
def __setitem__(self, key, val):
|
||||
"""Store given key/value."""
|
||||
if key in self:
|
||||
# store value, do not increase number of uses
|
||||
|
|
@ -38,7 +38,7 @@ class LFUCache (dict):
|
|||
if len(self) > self.size:
|
||||
self.shrink()
|
||||
|
||||
def shrink (self):
|
||||
def shrink(self):
|
||||
"""Shrink ca. 5% of entries."""
|
||||
trim = int(0.05*len(self))
|
||||
if trim:
|
||||
|
|
@ -49,24 +49,24 @@ class LFUCache (dict):
|
|||
for item in values[0:trim]:
|
||||
del self[item[0]]
|
||||
|
||||
def __getitem__ (self, key):
|
||||
def __getitem__(self, key):
|
||||
"""Update key usage and return value."""
|
||||
entry = super(LFUCache, self).__getitem__(key)
|
||||
entry[0] += 1
|
||||
return entry[1]
|
||||
|
||||
def uses (self, key):
|
||||
def uses(self, key):
|
||||
"""Get number of uses for given key (without increasing the number of
|
||||
uses)"""
|
||||
return super(LFUCache, self).__getitem__(key)[0]
|
||||
|
||||
def get (self, key, def_val=None):
|
||||
def get(self, key, def_val=None):
|
||||
"""Update key usage if found and return value, else return default."""
|
||||
if key in self:
|
||||
return self[key]
|
||||
return def_val
|
||||
|
||||
def setdefault (self, key, def_val=None):
|
||||
def setdefault(self, key, def_val=None):
|
||||
"""Update key usage if found and return value, else set and return
|
||||
default."""
|
||||
if key in self:
|
||||
|
|
@ -74,30 +74,30 @@ class LFUCache (dict):
|
|||
self[key] = def_val
|
||||
return def_val
|
||||
|
||||
def items (self):
|
||||
def items(self):
|
||||
"""Return list of items, not updating usage count."""
|
||||
return [(key, value[1]) for key, value in super(LFUCache, self).items()]
|
||||
|
||||
def iteritems (self):
|
||||
def iteritems(self):
|
||||
"""Return iterator of items, not updating usage count."""
|
||||
for key, value in super(LFUCache, self).items():
|
||||
yield (key, value[1])
|
||||
|
||||
def values (self):
|
||||
def values(self):
|
||||
"""Return list of values, not updating usage count."""
|
||||
return [value[1] for value in super(LFUCache, self).values()]
|
||||
|
||||
def itervalues (self):
|
||||
def itervalues(self):
|
||||
"""Return iterator of values, not updating usage count."""
|
||||
for value in super(LFUCache, self).values():
|
||||
yield value[1]
|
||||
|
||||
def popitem (self):
|
||||
def popitem(self):
|
||||
"""Remove and return an item."""
|
||||
key, value = super(LFUCache, self).popitem()
|
||||
return (key, value[1])
|
||||
|
||||
def pop (self):
|
||||
def pop(self):
|
||||
"""Remove and return a value."""
|
||||
value = super(LFUCache, self).pop()
|
||||
return value[1]
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import email
|
|||
import requests
|
||||
|
||||
|
||||
def from_file (filename):
|
||||
def from_file(filename):
|
||||
"""Parse cookie data from a text file in HTTP header format.
|
||||
|
||||
@return: list of tuples (headers, scheme, host, path)
|
||||
|
|
@ -43,7 +43,7 @@ def from_file (filename):
|
|||
return entries
|
||||
|
||||
|
||||
def from_headers (strheader):
|
||||
def from_headers(strheader):
|
||||
"""Parse cookie data from a string in HTTP header (RFC 2616) format.
|
||||
|
||||
@return: list of cookies
|
||||
|
|
|
|||
|
|
@ -19,17 +19,17 @@ Simple decorators (usable in Python >= 2.4).
|
|||
Example:
|
||||
|
||||
@synchronized(thread.allocate_lock())
|
||||
def f ():
|
||||
def f():
|
||||
"Synchronized function"
|
||||
print("i am synchronized:", f, f.__doc__)
|
||||
|
||||
@deprecated
|
||||
def g ():
|
||||
def g():
|
||||
"this function is deprecated"
|
||||
pass
|
||||
|
||||
@notimplemented
|
||||
def h ():
|
||||
def h():
|
||||
"todo"
|
||||
pass
|
||||
|
||||
|
|
@ -41,7 +41,7 @@ import sys
|
|||
import time
|
||||
|
||||
|
||||
def update_func_meta (fake_func, real_func):
|
||||
def update_func_meta(fake_func, real_func):
|
||||
"""Set meta information (eg. __doc__) of fake function to that
|
||||
of the real function.
|
||||
@return fake_func
|
||||
|
|
@ -53,10 +53,10 @@ def update_func_meta (fake_func, real_func):
|
|||
return fake_func
|
||||
|
||||
|
||||
def deprecated (func):
|
||||
def deprecated(func):
|
||||
"""A decorator which can be used to mark functions as deprecated.
|
||||
It emits a warning when the function is called."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Print deprecated warning and execute original function."""
|
||||
warnings.warn("Call to deprecated function %s." % func.__name__,
|
||||
category=DeprecationWarning)
|
||||
|
|
@ -64,7 +64,7 @@ def deprecated (func):
|
|||
return update_func_meta(newfunc, func)
|
||||
|
||||
|
||||
def signal_handler (signal_number):
|
||||
def signal_handler(signal_number):
|
||||
"""From http://aspn.activestate.com/ASPN/Cookbook/Python/Recipe/410666
|
||||
|
||||
A decorator to set the specified function as handler for a signal.
|
||||
|
|
@ -74,7 +74,7 @@ def signal_handler (signal_number):
|
|||
no handler is set.
|
||||
"""
|
||||
# create the 'real' decorator which takes only a function as an argument
|
||||
def newfunc (function):
|
||||
def newfunc(function):
|
||||
"""Register function as signal handler."""
|
||||
# note: actually the kill(2) function uses the signal number of 0
|
||||
# for a special case, but for signal(2) only positive integers
|
||||
|
|
@ -86,9 +86,9 @@ def signal_handler (signal_number):
|
|||
return newfunc
|
||||
|
||||
|
||||
def synchronize (lock, func, log_duration_secs=0):
|
||||
def synchronize(lock, func, log_duration_secs=0):
|
||||
"""Return synchronized function acquiring the given lock."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Execute function synchronized."""
|
||||
t = time.time()
|
||||
with lock:
|
||||
|
|
@ -99,14 +99,14 @@ def synchronize (lock, func, log_duration_secs=0):
|
|||
return update_func_meta(newfunc, func)
|
||||
|
||||
|
||||
def synchronized (lock):
|
||||
def synchronized(lock):
|
||||
"""A decorator calling a function with aqcuired lock."""
|
||||
return lambda func: synchronize(lock, func)
|
||||
|
||||
|
||||
def notimplemented (func):
|
||||
def notimplemented(func):
|
||||
"""Raises a NotImplementedError if the function is called."""
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Raise NotImplementedError"""
|
||||
co = func.func_code
|
||||
attrs = (co.co_name, co.co_filename, co.co_firstlineno)
|
||||
|
|
@ -114,10 +114,10 @@ def notimplemented (func):
|
|||
return update_func_meta(newfunc, func)
|
||||
|
||||
|
||||
def timeit (func, log, limit):
|
||||
def timeit(func, log, limit):
|
||||
"""Print execution time of the function. For quick'n'dirty profiling."""
|
||||
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
"""Execute function and print execution time."""
|
||||
t = time.time()
|
||||
res = func(*args, **kwargs)
|
||||
|
|
@ -130,7 +130,7 @@ def timeit (func, log, limit):
|
|||
return update_func_meta(newfunc, func)
|
||||
|
||||
|
||||
def timed (log=sys.stderr, limit=2.0):
|
||||
def timed(log=sys.stderr, limit=2.0):
|
||||
"""Decorator to run a function with timing info."""
|
||||
return lambda func: timeit(func, log, limit)
|
||||
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ from ..cache import urlqueue, robots_txt, results
|
|||
from . import aggregator, console
|
||||
|
||||
|
||||
def check_urls (aggregate):
|
||||
def check_urls(aggregate):
|
||||
"""Main check function; checks all configured URLs until interrupted
|
||||
with Ctrl-C.
|
||||
@return: None
|
||||
|
|
@ -66,7 +66,7 @@ def check_urls (aggregate):
|
|||
# and both should be handled by the calling layer.
|
||||
|
||||
|
||||
def check_url (aggregate):
|
||||
def check_url(aggregate):
|
||||
"""Helper function waiting for URL queue."""
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -79,7 +79,7 @@ def check_url (aggregate):
|
|||
break
|
||||
|
||||
|
||||
def interrupt (aggregate):
|
||||
def interrupt(aggregate):
|
||||
"""Interrupt execution and shutdown, ignoring any subsequent
|
||||
interrupts."""
|
||||
while True:
|
||||
|
|
@ -94,7 +94,7 @@ def interrupt (aggregate):
|
|||
pass
|
||||
|
||||
|
||||
def abort (aggregate):
|
||||
def abort(aggregate):
|
||||
"""Helper function to ensure a clean shutdown."""
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -108,7 +108,7 @@ def abort (aggregate):
|
|||
abort_now()
|
||||
|
||||
|
||||
def abort_now ():
|
||||
def abort_now():
|
||||
"""Force exit of current process without cleanup."""
|
||||
if os.name == 'posix':
|
||||
# Unix systems can use signals
|
||||
|
|
@ -124,7 +124,7 @@ def abort_now ():
|
|||
os._exit(3)
|
||||
|
||||
|
||||
def get_aggregate (config):
|
||||
def get_aggregate(config):
|
||||
"""Get an aggregator instance with given configuration."""
|
||||
_urlqueue = urlqueue.UrlQueue(max_allowed_urls=config["maxnumurls"])
|
||||
_robots_txt = robots_txt.RobotsTxt(config["useragent"])
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ def new_request_session(config, cookies):
|
|||
class Aggregate:
|
||||
"""Store thread-safe data collections for checker threads."""
|
||||
|
||||
def __init__ (self, config, urlqueue, robots_txt, plugin_manager,
|
||||
def __init__(self, config, urlqueue, robots_txt, plugin_manager,
|
||||
result_cache):
|
||||
"""Store given link checking objects."""
|
||||
self.config = config
|
||||
|
|
@ -105,7 +105,7 @@ class Aggregate:
|
|||
raise LinkCheckerError("No cookies set by login URL %s" % url)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def start_threads (self):
|
||||
def start_threads(self):
|
||||
"""Spawn threads for URL checking and status printing."""
|
||||
if self.config["status"]:
|
||||
t = status.Status(self, self.config["status_wait_seconds"])
|
||||
|
|
@ -150,7 +150,7 @@ class Aggregate:
|
|||
self.times[host] = t + wait_time
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def print_active_threads (self):
|
||||
def print_active_threads(self):
|
||||
"""Log all currently active threads."""
|
||||
debug = log.is_debug(LOG_CHECK)
|
||||
if debug:
|
||||
|
|
@ -174,11 +174,11 @@ class Aggregate:
|
|||
if name.startswith("CheckThread-"):
|
||||
yield name
|
||||
|
||||
def cancel (self):
|
||||
def cancel(self):
|
||||
"""Empty the URL queue."""
|
||||
self.urlqueue.do_shutdown()
|
||||
|
||||
def abort (self):
|
||||
def abort(self):
|
||||
"""Print still-active URLs and empty the URL queue."""
|
||||
self.print_active_threads()
|
||||
self.cancel()
|
||||
|
|
@ -190,12 +190,12 @@ class Aggregate:
|
|||
raise KeyboardInterrupt()
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def remove_stopped_threads (self):
|
||||
def remove_stopped_threads(self):
|
||||
"""Remove the stopped threads from the internal thread list."""
|
||||
self.threads = [t for t in self.threads if t.is_alive()]
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def finish (self):
|
||||
def finish(self):
|
||||
"""Wait for checker threads to finish."""
|
||||
if not self.urlqueue.empty():
|
||||
# This happens when all checker threads died.
|
||||
|
|
@ -206,7 +206,7 @@ class Aggregate:
|
|||
t.join(timeout=1.0)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def is_finished (self):
|
||||
def is_finished(self):
|
||||
"""Determine if checking is finished."""
|
||||
self.remove_stopped_threads()
|
||||
return self.urlqueue.empty() and not self.threads
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from .. import parser
|
|||
QUEUE_POLL_INTERVALL_SECS = 1.0
|
||||
|
||||
|
||||
def check_urls (urlqueue, logger):
|
||||
def check_urls(urlqueue, logger):
|
||||
"""Check URLs without threading."""
|
||||
while not urlqueue.empty():
|
||||
url_data = urlqueue.get()
|
||||
|
|
@ -80,21 +80,21 @@ def check_url(url_data, logger):
|
|||
class Checker(task.LoggedCheckedTask):
|
||||
"""URL check thread."""
|
||||
|
||||
def __init__ (self, urlqueue, logger, add_request_session):
|
||||
def __init__(self, urlqueue, logger, add_request_session):
|
||||
"""Store URL queue and logger."""
|
||||
super(Checker, self).__init__(logger)
|
||||
self.urlqueue = urlqueue
|
||||
self.origname = self.getName()
|
||||
self.add_request_session = add_request_session
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Check URLs in the queue."""
|
||||
# construct per-thread HTTP/S requests session
|
||||
self.add_request_session()
|
||||
while not self.stopped(0):
|
||||
self.check_url()
|
||||
|
||||
def check_url (self):
|
||||
def check_url(self):
|
||||
"""Try to get URL data from queue and check it."""
|
||||
try:
|
||||
url_data = self.urlqueue.get(timeout=QUEUE_POLL_INTERVALL_SECS)
|
||||
|
|
@ -109,7 +109,7 @@ class Checker(task.LoggedCheckedTask):
|
|||
except Exception:
|
||||
self.internal_error()
|
||||
|
||||
def check_url_data (self, url_data):
|
||||
def check_url_data(self, url_data):
|
||||
"""Check one URL data instance."""
|
||||
if url_data.url is None:
|
||||
url = ""
|
||||
|
|
|
|||
|
|
@ -29,11 +29,11 @@ stdout = i18n.get_encoded_writer()
|
|||
class StatusLogger:
|
||||
"""Standard status logger. Default output is stderr."""
|
||||
|
||||
def __init__ (self, fd=stderr):
|
||||
def __init__(self, fd=stderr):
|
||||
"""Save file descriptor for logging."""
|
||||
self.fd = fd
|
||||
|
||||
def log_status (self, checked, in_progress, queue, duration, num_urls):
|
||||
def log_status(self, checked, in_progress, queue, duration, num_urls):
|
||||
"""Write status message to file descriptor."""
|
||||
msg = _n("%2d thread active", "%2d threads active", in_progress) % \
|
||||
in_progress
|
||||
|
|
@ -48,20 +48,20 @@ class StatusLogger:
|
|||
self.writeln(msg)
|
||||
self.flush()
|
||||
|
||||
def write (self, msg):
|
||||
def write(self, msg):
|
||||
"""Write message to file descriptor."""
|
||||
self.fd.write(msg)
|
||||
|
||||
def writeln (self, msg):
|
||||
def writeln(self, msg):
|
||||
"""Write status message and line break to file descriptor."""
|
||||
self.fd.write("%s%s" % (msg, os.linesep))
|
||||
|
||||
def flush (self):
|
||||
def flush(self):
|
||||
"""Flush file descriptor."""
|
||||
self.fd.flush()
|
||||
|
||||
|
||||
def internal_error (out=stderr, etype=None, evalue=None, tb=None):
|
||||
def internal_error(out=stderr, etype=None, evalue=None, tb=None):
|
||||
"""Print internal error message (output defaults to stderr)."""
|
||||
print(os.linesep, file=out)
|
||||
print(_("""********** Oops, I did it again. *************
|
||||
|
|
@ -94,20 +94,20 @@ I can work with ;) .
|
|||
_("******** LinkChecker internal error, over and out ********"), file=out)
|
||||
|
||||
|
||||
def print_env_info (key, out=stderr):
|
||||
def print_env_info(key, out=stderr):
|
||||
"""If given environment key is defined, print it out."""
|
||||
value = os.getenv(key)
|
||||
if value is not None:
|
||||
print(key, "=", repr(value), file=out)
|
||||
|
||||
|
||||
def print_proxy_info (out=stderr):
|
||||
def print_proxy_info(out=stderr):
|
||||
"""Print proxy info."""
|
||||
for key in ("http_proxy", "ftp_proxy", "no_proxy"):
|
||||
print_env_info(key, out=out)
|
||||
|
||||
|
||||
def print_locale_info (out=stderr):
|
||||
def print_locale_info(out=stderr):
|
||||
"""Print locale info."""
|
||||
for key in ("LANGUAGE", "LC_ALL", "LC_CTYPE", "LANG"):
|
||||
print_env_info(key, out=out)
|
||||
|
|
@ -131,7 +131,7 @@ PYTHON_ENV_VARS = (
|
|||
'PYTHONWARNINGS',
|
||||
'PYTHONHASHSEED',
|
||||
)
|
||||
def print_app_info (out=stderr):
|
||||
def print_app_info(out=stderr):
|
||||
"""Print system and application info (output defaults to stderr)."""
|
||||
print(_("System info:"), file=out)
|
||||
print(configuration.App, file=out)
|
||||
|
|
@ -146,7 +146,7 @@ def print_app_info (out=stderr):
|
|||
print(_("sys.argv:"), sys.argv, file=out)
|
||||
|
||||
|
||||
def print_version (out=stdout):
|
||||
def print_version(out=stdout):
|
||||
"""Print the program version (output defaults to stdout)."""
|
||||
print(configuration.App, _("released"),
|
||||
configuration.ReleaseDate, file=out)
|
||||
|
|
|
|||
|
|
@ -19,14 +19,14 @@ from . import task
|
|||
from .. import log, LOG_CHECK, strformat
|
||||
|
||||
|
||||
class Interrupt (task.CheckedTask):
|
||||
class Interrupt(task.CheckedTask):
|
||||
"""Thread that raises KeyboardInterrupt after a specified duration.
|
||||
This gives us a portable SIGALRM implementation.
|
||||
The duration is checked every 5 seconds.
|
||||
"""
|
||||
WaitSeconds = 5
|
||||
|
||||
def __init__ (self, duration):
|
||||
def __init__(self, duration):
|
||||
"""Initialize the task.
|
||||
@param duration: raise KeyboardInterrupt after given number of seconds
|
||||
@ptype duration: int
|
||||
|
|
@ -34,7 +34,7 @@ class Interrupt (task.CheckedTask):
|
|||
super(Interrupt, self).__init__()
|
||||
self.duration = duration
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Wait and raise KeyboardInterrupt after."""
|
||||
self.start_time = time.time()
|
||||
self.setName("Interrupt")
|
||||
|
|
|
|||
|
|
@ -24,28 +24,28 @@ _lock = threading.Lock()
|
|||
class Logger:
|
||||
"""Thread safe multi-logger class used by aggregator instances."""
|
||||
|
||||
def __init__ (self, config):
|
||||
def __init__(self, config):
|
||||
"""Initialize basic logging variables."""
|
||||
self.loggers = [config['logger']]
|
||||
self.loggers.extend(config['fileoutput'])
|
||||
self.verbose = config["verbose"]
|
||||
self.warnings = config["warnings"]
|
||||
|
||||
def start_log_output (self):
|
||||
def start_log_output(self):
|
||||
"""
|
||||
Start output of all configured loggers.
|
||||
"""
|
||||
for logger in self.loggers:
|
||||
logger.start_output()
|
||||
|
||||
def end_log_output (self, **kwargs):
|
||||
def end_log_output(self, **kwargs):
|
||||
"""
|
||||
End output of all configured loggers.
|
||||
"""
|
||||
for logger in self.loggers:
|
||||
logger.end_output(**kwargs)
|
||||
|
||||
def do_print (self, url_data):
|
||||
def do_print(self, url_data):
|
||||
"""Determine if URL entry should be logged or not."""
|
||||
if self.verbose:
|
||||
return True
|
||||
|
|
@ -54,7 +54,7 @@ class Logger:
|
|||
return not url_data.valid
|
||||
|
||||
@synchronized(_lock)
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Send new url to all configured loggers."""
|
||||
self.check_active_loggers()
|
||||
do_print = self.do_print(url_data)
|
||||
|
|
@ -64,7 +64,7 @@ class Logger:
|
|||
log.log_filter_url(url_data, do_print)
|
||||
|
||||
@synchronized(_lock)
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Document that an internal error occurred."""
|
||||
for logger in self.loggers:
|
||||
logger.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -18,10 +18,10 @@ import time
|
|||
from . import task
|
||||
|
||||
|
||||
class Status (task.LoggedCheckedTask):
|
||||
class Status(task.LoggedCheckedTask):
|
||||
"""Thread that gathers and logs the status periodically."""
|
||||
|
||||
def __init__ (self, aggregator, wait_seconds):
|
||||
def __init__(self, aggregator, wait_seconds):
|
||||
"""Initialize the status logger task.
|
||||
@param urlqueue: the URL queue
|
||||
@ptype urlqueue: Urlqueue
|
||||
|
|
@ -36,7 +36,7 @@ class Status (task.LoggedCheckedTask):
|
|||
self.wait_seconds = wait_seconds
|
||||
assert self.wait_seconds >= 1
|
||||
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Print periodic status messages."""
|
||||
self.start_time = time.time()
|
||||
self.setName("Status")
|
||||
|
|
@ -49,7 +49,7 @@ class Status (task.LoggedCheckedTask):
|
|||
wait_seconds = self.wait_seconds
|
||||
first_wait = False
|
||||
|
||||
def log_status (self):
|
||||
def log_status(self):
|
||||
"""Log a status message."""
|
||||
duration = time.time() - self.start_time
|
||||
checked, in_progress, queue = self.aggregator.urlqueue.status()
|
||||
|
|
|
|||
|
|
@ -20,10 +20,10 @@ from .. import threader
|
|||
from . import console
|
||||
|
||||
|
||||
class CheckedTask (threader.StoppableThread):
|
||||
class CheckedTask(threader.StoppableThread):
|
||||
"""Stoppable URL check task, handling error conditions while running."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Handle keyboard interrupt and other errors."""
|
||||
try:
|
||||
self.run_checked()
|
||||
|
|
@ -33,25 +33,25 @@ class CheckedTask (threader.StoppableThread):
|
|||
self.internal_error()
|
||||
|
||||
@notimplemented
|
||||
def run_checked (self):
|
||||
def run_checked(self):
|
||||
"""Overload in subclass."""
|
||||
pass
|
||||
|
||||
@notimplemented
|
||||
def internal_error (self):
|
||||
def internal_error(self):
|
||||
"""Overload in subclass."""
|
||||
pass
|
||||
|
||||
|
||||
class LoggedCheckedTask (CheckedTask):
|
||||
class LoggedCheckedTask(CheckedTask):
|
||||
"""URL check task with a logger instance and internal error handling."""
|
||||
|
||||
def __init__ (self, logger):
|
||||
def __init__(self, logger):
|
||||
"""Initialize super instance and store given logger."""
|
||||
super(CheckedTask, self).__init__()
|
||||
self.logger = logger
|
||||
|
||||
def internal_error (self):
|
||||
def internal_error(self):
|
||||
"""Log an internal error on console and the logger."""
|
||||
console.internal_error()
|
||||
self.logger.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -20,59 +20,59 @@ Dummy objects.
|
|||
class Dummy:
|
||||
"""A dummy object ignores all access to it. Useful for testing."""
|
||||
|
||||
def __init__ (self, *args, **kwargs):
|
||||
def __init__(self, *args, **kwargs):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __call__ (self, *args, **kwargs):
|
||||
def __call__(self, *args, **kwargs):
|
||||
"""Return self."""
|
||||
return self
|
||||
|
||||
def __getattr__ (self, name):
|
||||
def __getattr__(self, name):
|
||||
"""Return self."""
|
||||
return self
|
||||
|
||||
def __setattr__ (self, name, value):
|
||||
def __setattr__(self, name, value):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __delattr__ (self, name):
|
||||
def __delattr__(self, name):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Return 'dummy'"""
|
||||
return "dummy"
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""Return '<dummy>'"""
|
||||
return "<dummy>"
|
||||
|
||||
def __unicode__ (self):
|
||||
def __unicode__(self):
|
||||
"""Return 'dummy'"""
|
||||
return "dummy"
|
||||
|
||||
def __len__ (self):
|
||||
def __len__(self):
|
||||
"""Return zero"""
|
||||
return 0
|
||||
|
||||
def __getitem__ (self, key):
|
||||
def __getitem__(self, key):
|
||||
"""Return self"""
|
||||
return self
|
||||
|
||||
def __setitem__ (self, key, value):
|
||||
def __setitem__(self, key, value):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __delitem__ (self, key):
|
||||
def __delitem__(self, key):
|
||||
"""Return None"""
|
||||
pass
|
||||
|
||||
def __contains__ (self, key):
|
||||
def __contains__(self, key):
|
||||
"""Return False"""
|
||||
return False
|
||||
|
||||
|
||||
def dummy (*args, **kwargs):
|
||||
def dummy(*args, **kwargs):
|
||||
"""Ignore any positional or keyword arguments, return None."""
|
||||
pass
|
||||
|
|
|
|||
|
|
@ -28,7 +28,7 @@ from functools import lru_cache
|
|||
from builtins import str as str_text
|
||||
|
||||
|
||||
def has_module (name, without_error=True):
|
||||
def has_module(name, without_error=True):
|
||||
"""Test if given module can be imported.
|
||||
@param without_error: True if module must not throw any errors when importing
|
||||
@return: flag if import is successful
|
||||
|
|
@ -47,14 +47,14 @@ def has_module (name, without_error=True):
|
|||
class GlobDirectoryWalker:
|
||||
"""A forward iterator that traverses a directory tree."""
|
||||
|
||||
def __init__ (self, directory, pattern="*"):
|
||||
def __init__(self, directory, pattern="*"):
|
||||
"""Set start directory and pattern matcher."""
|
||||
self.stack = [directory]
|
||||
self.pattern = pattern
|
||||
self.files = []
|
||||
self.index = 0
|
||||
|
||||
def __getitem__ (self, index):
|
||||
def __getitem__(self, index):
|
||||
"""Search for next filename."""
|
||||
while True:
|
||||
try:
|
||||
|
|
@ -81,22 +81,22 @@ rglob = GlobDirectoryWalker
|
|||
class Buffer:
|
||||
"""Holds buffered data"""
|
||||
|
||||
def __init__ (self, empty=''):
|
||||
def __init__(self, empty=''):
|
||||
"""Initialize buffer."""
|
||||
self.empty = self.buf = empty
|
||||
self.tmpbuf = []
|
||||
self.pos = 0
|
||||
|
||||
def __len__ (self):
|
||||
def __len__(self):
|
||||
"""Buffer length."""
|
||||
return self.pos
|
||||
|
||||
def write (self, data):
|
||||
def write(self, data):
|
||||
"""Write data to buffer."""
|
||||
self.tmpbuf.append(data)
|
||||
self.pos += len(data)
|
||||
|
||||
def flush (self, overlap=0):
|
||||
def flush(self, overlap=0):
|
||||
"""Flush buffered data and return it."""
|
||||
self.buf += self.empty.join(self.tmpbuf)
|
||||
self.tmpbuf = []
|
||||
|
|
@ -109,7 +109,7 @@ class Buffer:
|
|||
return data
|
||||
|
||||
|
||||
def get_mtime (filename):
|
||||
def get_mtime(filename):
|
||||
"""Return modification time of filename or zero on errors."""
|
||||
try:
|
||||
return os.path.getmtime(filename)
|
||||
|
|
@ -117,7 +117,7 @@ def get_mtime (filename):
|
|||
return 0
|
||||
|
||||
|
||||
def get_size (filename):
|
||||
def get_size(filename):
|
||||
"""Return file size in Bytes, or -1 on error."""
|
||||
try:
|
||||
return os.path.getsize(filename)
|
||||
|
|
@ -135,7 +135,7 @@ elif "G_BROKEN_FILENAMES" in os.environ:
|
|||
else:
|
||||
FSCODING = "utf-8"
|
||||
|
||||
def path_safe (path):
|
||||
def path_safe(path):
|
||||
"""Ensure path string is compatible with the platform file system encoding."""
|
||||
if isinstance(path, str_text) and not os.path.supports_unicode_filenames:
|
||||
path = path.encode(FSCODING, "replace").decode(FSCODING)
|
||||
|
|
@ -144,7 +144,7 @@ def path_safe (path):
|
|||
|
||||
# cache for modified check {absolute filename -> mtime}
|
||||
_mtime_cache = {}
|
||||
def has_changed (filename):
|
||||
def has_changed(filename):
|
||||
"""Check if filename has changed since the last check. If this
|
||||
is the first check, assume the file is changed."""
|
||||
key = os.path.abspath(filename)
|
||||
|
|
@ -155,14 +155,14 @@ def has_changed (filename):
|
|||
return mtime > _mtime_cache[key]
|
||||
|
||||
|
||||
def get_temp_file (mode='r', **kwargs):
|
||||
def get_temp_file(mode='r', **kwargs):
|
||||
"""Return tuple (open file object, filename) pointing to a temporary
|
||||
file."""
|
||||
fd, filename = tempfile.mkstemp(**kwargs)
|
||||
return os.fdopen(fd, mode), filename
|
||||
|
||||
|
||||
def is_tty (fp):
|
||||
def is_tty(fp):
|
||||
"""Check if is a file object pointing to a TTY."""
|
||||
return (hasattr(fp, "isatty") and fp.isatty())
|
||||
|
||||
|
|
|
|||
|
|
@ -21,12 +21,12 @@ See also http://cr.yp.to/ftpparse.html
|
|||
|
||||
months = ("jan", "feb", "mar", "apr", "may", "jun", "jul", "aug", "sep",
|
||||
"oct", "nov", "dec")
|
||||
def ismonth (txt):
|
||||
def ismonth(txt):
|
||||
"""Check if given text is a month name."""
|
||||
return txt.lower() in months
|
||||
|
||||
|
||||
def ftpparse (line):
|
||||
def ftpparse(line):
|
||||
"""Parse a FTP list line into a dictionary with attributes:
|
||||
name - name of file (string)
|
||||
trycwd - False if cwd is definitely pointless, True otherwise
|
||||
|
|
|
|||
|
|
@ -91,13 +91,13 @@ swf_url_re = re.compile(b"(?i)%s" % urlutil.safe_url_pattern.encode('ascii'))
|
|||
c_comment_re = re.compile(r"/\*.*?\*/", re.DOTALL)
|
||||
|
||||
|
||||
def strip_c_comments (text):
|
||||
def strip_c_comments(text):
|
||||
"""Remove C/CSS-style comments from text. Note that this method also
|
||||
deliberately removes comments inside of strings."""
|
||||
return c_comment_re.sub('', text)
|
||||
|
||||
|
||||
def is_meta_url (attr, attrs):
|
||||
def is_meta_url(attr, attrs):
|
||||
"""Check if the meta attributes contain a URL."""
|
||||
res = False
|
||||
if attr == "content":
|
||||
|
|
@ -123,7 +123,7 @@ class LinkFinder:
|
|||
"""Find HTML links, and apply them to the callback function with the
|
||||
format (url, lineno, column, name, codebase)."""
|
||||
|
||||
def __init__ (self, callback, tags):
|
||||
def __init__(self, callback, tags):
|
||||
"""Store content in buffer and initialize URL list."""
|
||||
self.callback = callback
|
||||
# set universal tag attributes using tagname None
|
||||
|
|
@ -135,7 +135,7 @@ class LinkFinder:
|
|||
self.tags[tag].update(self.universal_attrs)
|
||||
self.base_ref = ''
|
||||
|
||||
def html_element (self, tag, attrs, element_text, lineno, column):
|
||||
def html_element(self, tag, attrs, element_text, lineno, column):
|
||||
"""Search for links and store found URLs in a list."""
|
||||
log.debug(LOG_CHECK, "LinkFinder tag %s attrs %s", tag, attrs)
|
||||
log.debug(LOG_CHECK, "line %d col %d", lineno, column)
|
||||
|
|
@ -166,7 +166,7 @@ class LinkFinder:
|
|||
self.parse_tag(tag, attr, value, name, base, lineno, column)
|
||||
log.debug(LOG_CHECK, "LinkFinder finished tag %s", tag)
|
||||
|
||||
def get_link_name (self, tag, attrs, attr, name=None):
|
||||
def get_link_name(self, tag, attrs, attr, name=None):
|
||||
"""Parse attrs for link name. Return name of link."""
|
||||
if tag == 'a' and attr == 'href':
|
||||
if not name:
|
||||
|
|
@ -179,7 +179,7 @@ class LinkFinder:
|
|||
name = ""
|
||||
return name
|
||||
|
||||
def parse_tag (self, tag, attr, value, name, base, lineno, column):
|
||||
def parse_tag(self, tag, attr, value, name, base, lineno, column):
|
||||
"""Add given url data to url list."""
|
||||
assert isinstance(tag, str_text), repr(tag)
|
||||
assert isinstance(attr, str_text), repr(attr)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,7 @@ import base64
|
|||
from datetime import datetime
|
||||
|
||||
|
||||
def encode_base64 (s):
|
||||
def encode_base64(s):
|
||||
"""Encode given string in base64, excluding trailing newlines."""
|
||||
return base64.b64encode(s)
|
||||
|
||||
|
|
@ -68,7 +68,7 @@ def asn1_generaltime_to_seconds(timestr):
|
|||
pass
|
||||
return res
|
||||
|
||||
def has_header_value (headers, name, value):
|
||||
def has_header_value(headers, name, value):
|
||||
"""
|
||||
Look in headers for a specific header name and value.
|
||||
Both name and value are case insensitive.
|
||||
|
|
@ -84,7 +84,7 @@ def has_header_value (headers, name, value):
|
|||
return False
|
||||
|
||||
|
||||
def get_content_type (headers):
|
||||
def get_content_type(headers):
|
||||
"""
|
||||
Get the MIME type from the Content-Type header value, or
|
||||
'application/octet-stream' if not found.
|
||||
|
|
|
|||
|
|
@ -30,32 +30,32 @@ default_language = default_encoding = None
|
|||
default_directory = None
|
||||
default_domain = None
|
||||
|
||||
def install_builtin (translator, do_unicode):
|
||||
def install_builtin(translator, do_unicode):
|
||||
"""Install _() and _n() gettext methods into default namespace."""
|
||||
import builtins
|
||||
builtins.__dict__['_'] = translator.gettext
|
||||
# also install ngettext
|
||||
builtins.__dict__['_n'] = translator.ngettext
|
||||
|
||||
class Translator (gettext.GNUTranslations):
|
||||
class Translator(gettext.GNUTranslations):
|
||||
"""A translation class always installing its gettext methods into the
|
||||
default namespace."""
|
||||
|
||||
def install (self, do_unicode):
|
||||
def install(self, do_unicode):
|
||||
"""Install gettext methods into the default namespace."""
|
||||
install_builtin(self, do_unicode)
|
||||
|
||||
|
||||
class NullTranslator (gettext.NullTranslations):
|
||||
class NullTranslator(gettext.NullTranslations):
|
||||
"""A dummy translation class always installing its gettext methods into
|
||||
the default namespace."""
|
||||
|
||||
def install (self, do_unicode):
|
||||
def install(self, do_unicode):
|
||||
"""Install gettext methods into the default namespace."""
|
||||
install_builtin(self, do_unicode)
|
||||
|
||||
|
||||
def init (domain, directory, loc=None):
|
||||
def init(domain, directory, loc=None):
|
||||
"""Initialize this gettext i18n module. Searches for supported languages
|
||||
and installs the gettext translator class."""
|
||||
global default_language, default_encoding, default_domain, default_directory
|
||||
|
|
@ -90,7 +90,7 @@ def install_language(language):
|
|||
translator.install(do_unicode)
|
||||
|
||||
|
||||
def get_translator (domain, directory, languages=None,
|
||||
def get_translator(domain, directory, languages=None,
|
||||
translatorklass=Translator, fallback=False,
|
||||
fallbackklass=NullTranslator):
|
||||
"""Search the appropriate GNUTranslations class."""
|
||||
|
|
@ -101,14 +101,14 @@ def get_translator (domain, directory, languages=None,
|
|||
return translator
|
||||
|
||||
|
||||
def get_lang (lang):
|
||||
def get_lang(lang):
|
||||
"""Return lang if it is supported, or the default language."""
|
||||
if lang in supported_languages:
|
||||
return lang
|
||||
return default_language
|
||||
|
||||
|
||||
def get_headers_lang (headers):
|
||||
def get_headers_lang(headers):
|
||||
"""Return preferred supported language in given HTTP headers."""
|
||||
if 'Accept-Language' not in headers:
|
||||
return default_language
|
||||
|
|
@ -132,7 +132,7 @@ def get_headers_lang (headers):
|
|||
return default_language
|
||||
|
||||
|
||||
def get_locale ():
|
||||
def get_locale():
|
||||
"""Search the default platform locale and norm it.
|
||||
@returns (locale, encoding)
|
||||
@rtype (string, string)"""
|
||||
|
|
@ -150,7 +150,7 @@ def get_locale ():
|
|||
return (loc, encoding)
|
||||
|
||||
|
||||
def norm_locale (loc):
|
||||
def norm_locale(loc):
|
||||
"""Normalize a locale."""
|
||||
loc = locale.normalize(loc)
|
||||
# split up the locale into its base components
|
||||
|
|
@ -175,17 +175,17 @@ lang_transis = {
|
|||
'en': {'de': 'Englisch'},
|
||||
}
|
||||
|
||||
def lang_name (lang):
|
||||
def lang_name(lang):
|
||||
"""Return full name of given language."""
|
||||
return lang_names[lang]
|
||||
|
||||
|
||||
def lang_trans (lang, curlang):
|
||||
def lang_trans(lang, curlang):
|
||||
"""Return translated full name of given language."""
|
||||
return lang_transis[lang][curlang]
|
||||
|
||||
|
||||
def get_encoded_writer (out=sys.stdout, encoding=None, errors='replace'):
|
||||
def get_encoded_writer(out=sys.stdout, encoding=None, errors='replace'):
|
||||
"""Get wrapped output writer with given encoding and error handling."""
|
||||
if encoding is None:
|
||||
encoding = default_encoding
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ def get_response_headers():
|
|||
]
|
||||
|
||||
|
||||
def formvalue (form, key):
|
||||
def formvalue(form, key):
|
||||
"""Get value with given key from WSGI form."""
|
||||
field = form.get(key)
|
||||
if isinstance(field, list):
|
||||
|
|
@ -99,7 +99,7 @@ class ThreadsafeIO:
|
|||
self.closed = False
|
||||
|
||||
@synchronized(_lock)
|
||||
def write (self, data):
|
||||
def write(self, data):
|
||||
"""Write given unicode data to buffer."""
|
||||
assert isinstance(data, str_text)
|
||||
if self.closed:
|
||||
|
|
@ -108,14 +108,14 @@ class ThreadsafeIO:
|
|||
self.buf.append(data)
|
||||
|
||||
@synchronized(_lock)
|
||||
def get_data (self):
|
||||
def get_data(self):
|
||||
"""Get bufferd unicode data."""
|
||||
data = "".join(self.buf)
|
||||
self.buf = []
|
||||
return data
|
||||
|
||||
@synchronized(_lock)
|
||||
def close (self):
|
||||
def close(self):
|
||||
"""Reset buffer and close this I/O object."""
|
||||
self.buf = []
|
||||
self.closed = True
|
||||
|
|
@ -126,7 +126,7 @@ def encode(s):
|
|||
return s.encode(HTML_ENCODING, 'ignore')
|
||||
|
||||
|
||||
def checklink (form=None, env=os.environ):
|
||||
def checklink(form=None, env=os.environ):
|
||||
"""Validates the CGI form and checks the given links."""
|
||||
if form is None:
|
||||
form = {}
|
||||
|
|
@ -147,7 +147,7 @@ def checklink (form=None, env=os.environ):
|
|||
out.close()
|
||||
|
||||
|
||||
def start_check (aggregate, out):
|
||||
def start_check(aggregate, out):
|
||||
"""Start checking in background and write encoded output to out."""
|
||||
# check in background
|
||||
t = threading.Thread(target=director.check_urls, args=(aggregate,))
|
||||
|
|
@ -183,12 +183,12 @@ def get_configuration(form, out):
|
|||
return config
|
||||
|
||||
|
||||
def get_host_name (form):
|
||||
def get_host_name(form):
|
||||
"""Return host name of given URL."""
|
||||
return urllib.parse.urlparse(formvalue(form, "url"))[1]
|
||||
|
||||
|
||||
def checkform (form, env):
|
||||
def checkform(form, env):
|
||||
"""Check form data. throw exception on error
|
||||
Be sure to NOT print out any user-given data as HTML code, so use
|
||||
only plain strings as exception text."""
|
||||
|
|
@ -227,13 +227,13 @@ def checkform (form, env):
|
|||
raise LCFormError(_("invalid %s option %r") % (option, value))
|
||||
|
||||
|
||||
def log (env, msg):
|
||||
def log(env, msg):
|
||||
"""Log message to WSGI error output."""
|
||||
logfile = env['wsgi.errors']
|
||||
logfile.write("%s\n" % msg)
|
||||
|
||||
|
||||
def dump (env, form):
|
||||
def dump(env, form):
|
||||
"""Log environment and form."""
|
||||
for var, value in env.items():
|
||||
log(env, var+"="+value)
|
||||
|
|
@ -241,7 +241,7 @@ def dump (env, form):
|
|||
log(env, str(formvalue(form, key)))
|
||||
|
||||
|
||||
def format_error (why):
|
||||
def format_error(why):
|
||||
"""Format standard error page.
|
||||
@param why: error message
|
||||
@ptype why: unicode
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ import imp
|
|||
from .fileutil import is_writable_by_others
|
||||
|
||||
|
||||
def is_frozen ():
|
||||
def is_frozen():
|
||||
"""Return True if running inside a py2exe- or py2app-generated
|
||||
executable."""
|
||||
return hasattr(sys, "frozen")
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Locking utility class.
|
|||
import threading
|
||||
from . import log, LOG_THREAD
|
||||
|
||||
def get_lock (name, debug=False):
|
||||
def get_lock(name, debug=False):
|
||||
"""Get a new lock.
|
||||
@param debug: if True, acquire() and release() will have debug messages
|
||||
@ptype debug: boolean, default is False
|
||||
|
|
@ -36,19 +36,19 @@ def get_lock (name, debug=False):
|
|||
class DebugLock:
|
||||
"""Debugging lock class."""
|
||||
|
||||
def __init__ (self, lock, name):
|
||||
def __init__(self, lock, name):
|
||||
"""Store lock and name parameters."""
|
||||
self.lock = lock
|
||||
self.name = name
|
||||
|
||||
def acquire (self, blocking=1):
|
||||
def acquire(self, blocking=1):
|
||||
"""Acquire lock."""
|
||||
threadname = threading.currentThread().getName()
|
||||
log.debug(LOG_THREAD, "Acquire %s for %s", self.name, threadname)
|
||||
self.lock.acquire(blocking)
|
||||
log.debug(LOG_THREAD, "...acquired %s for %s", self.name, threadname)
|
||||
|
||||
def release (self):
|
||||
def release(self):
|
||||
"""Release lock."""
|
||||
threadname = threading.currentThread().getName()
|
||||
log.debug(LOG_THREAD, "Release %s for %s", self.name, threadname)
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ import traceback
|
|||
#gc.set_debug(gc.DEBUG_LEAK)
|
||||
|
||||
PRINT_LOCALVARS = False
|
||||
def _stack_format (stack):
|
||||
def _stack_format(stack):
|
||||
"""Format a stack trace to a message.
|
||||
|
||||
@return: formatted stack message
|
||||
|
|
@ -54,7 +54,7 @@ def _stack_format (stack):
|
|||
return s.getvalue()
|
||||
|
||||
|
||||
def _log (fun, msg, args, **kwargs):
|
||||
def _log(fun, msg, args, **kwargs):
|
||||
"""Log a message with given function. Optional the following keyword
|
||||
arguments are supported:
|
||||
traceback(bool) - if True print traceback of current function
|
||||
|
|
@ -70,7 +70,7 @@ def _log (fun, msg, args, **kwargs):
|
|||
fun(traceback.format_exc())
|
||||
|
||||
|
||||
def debug (logname, msg, *args, **kwargs):
|
||||
def debug(logname, msg, *args, **kwargs):
|
||||
"""Log a debug message.
|
||||
|
||||
return: None
|
||||
|
|
@ -80,7 +80,7 @@ def debug (logname, msg, *args, **kwargs):
|
|||
_log(log.debug, msg, args, **kwargs)
|
||||
|
||||
|
||||
def info (logname, msg, *args, **kwargs):
|
||||
def info(logname, msg, *args, **kwargs):
|
||||
"""Log an informational message.
|
||||
|
||||
return: None
|
||||
|
|
@ -90,7 +90,7 @@ def info (logname, msg, *args, **kwargs):
|
|||
_log(log.info, msg, args, **kwargs)
|
||||
|
||||
|
||||
def warn (logname, msg, *args, **kwargs):
|
||||
def warn(logname, msg, *args, **kwargs):
|
||||
"""Log a warning.
|
||||
|
||||
return: None
|
||||
|
|
@ -100,7 +100,7 @@ def warn (logname, msg, *args, **kwargs):
|
|||
_log(log.warning, msg, args, **kwargs)
|
||||
|
||||
|
||||
def error (logname, msg, *args, **kwargs):
|
||||
def error(logname, msg, *args, **kwargs):
|
||||
"""Log an error.
|
||||
|
||||
return: None
|
||||
|
|
@ -110,7 +110,7 @@ def error (logname, msg, *args, **kwargs):
|
|||
_log(log.error, msg, args, **kwargs)
|
||||
|
||||
|
||||
def critical (logname, msg, *args, **kwargs):
|
||||
def critical(logname, msg, *args, **kwargs):
|
||||
"""Log a critical error.
|
||||
|
||||
return: None
|
||||
|
|
@ -120,7 +120,7 @@ def critical (logname, msg, *args, **kwargs):
|
|||
_log(log.critical, msg, args, **kwargs)
|
||||
|
||||
|
||||
def exception (logname, msg, *args, **kwargs):
|
||||
def exception(logname, msg, *args, **kwargs):
|
||||
"""Log an exception.
|
||||
|
||||
return: None
|
||||
|
|
@ -130,11 +130,11 @@ def exception (logname, msg, *args, **kwargs):
|
|||
_log(log.exception, msg, args, **kwargs)
|
||||
|
||||
|
||||
def is_debug (logname):
|
||||
def is_debug(logname):
|
||||
"""See if logger is on debug level."""
|
||||
return logging.getLogger(logname).isEnabledFor(logging.DEBUG)
|
||||
|
||||
|
||||
def shutdown ():
|
||||
def shutdown():
|
||||
"""Flush and close all log handlers."""
|
||||
logging.shutdown()
|
||||
|
|
|
|||
|
|
@ -63,7 +63,7 @@ def init_log_config(handler=None):
|
|||
add_loghandler(handler)
|
||||
|
||||
|
||||
def add_loghandler (handler):
|
||||
def add_loghandler(handler):
|
||||
"""Add log handler to root logger and LOG_ROOT and set formatting."""
|
||||
format = "%(levelname)s %(name)s %(asctime)s %(threadName)s %(message)s"
|
||||
handler.setFormatter(logging.Formatter(format))
|
||||
|
|
@ -71,7 +71,7 @@ def add_loghandler (handler):
|
|||
logging.getLogger().addHandler(handler)
|
||||
|
||||
|
||||
def remove_loghandler (handler):
|
||||
def remove_loghandler(handler):
|
||||
"""Remove log handler from root logger and LOG_ROOT."""
|
||||
logging.getLogger(LOG_ROOT).removeHandler(handler)
|
||||
logging.getLogger().removeHandler(handler)
|
||||
|
|
|
|||
|
|
@ -64,11 +64,11 @@ class LogStatistics:
|
|||
- URL lengths
|
||||
"""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Initialize log statistics."""
|
||||
self.reset()
|
||||
|
||||
def reset (self):
|
||||
def reset(self):
|
||||
"""Reset all log statistics to default values."""
|
||||
# number of logged URLs
|
||||
self.number = 0
|
||||
|
|
@ -92,7 +92,7 @@ class LogStatistics:
|
|||
# overall downloaded bytes
|
||||
self.downloaded_bytes = None
|
||||
|
||||
def log_url (self, url_data, do_print):
|
||||
def log_url(self, url_data, do_print):
|
||||
"""Log URL statistics."""
|
||||
self.number += 1
|
||||
if not url_data.valid:
|
||||
|
|
@ -124,12 +124,12 @@ class LogStatistics:
|
|||
# calculate running average
|
||||
self.avg_url_length += (l - self.avg_url_length) / self.avg_number
|
||||
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Increase internal error count."""
|
||||
self.internal_errors += 1
|
||||
|
||||
|
||||
class _Logger (abc.ABC):
|
||||
class _Logger(abc.ABC):
|
||||
"""
|
||||
Base class for logging of checked urls. It defines the public API
|
||||
(see below) and offers basic functionality for all loggers.
|
||||
|
|
@ -164,7 +164,7 @@ class _Logger (abc.ABC):
|
|||
# Default log configuration
|
||||
LoggerArgs = {}
|
||||
|
||||
def __init__ (self, **args):
|
||||
def __init__(self, **args):
|
||||
"""
|
||||
Initialize a logger, looking for part restrictions in kwargs.
|
||||
"""
|
||||
|
|
@ -198,18 +198,18 @@ class _Logger (abc.ABC):
|
|||
args.update(kwargs)
|
||||
return args
|
||||
|
||||
def get_charset_encoding (self):
|
||||
def get_charset_encoding(self):
|
||||
"""Translate the output encoding to a charset encoding name."""
|
||||
if self.output_encoding == "utf-8-sig":
|
||||
return "utf-8"
|
||||
return self.output_encoding
|
||||
|
||||
def encode (self, s):
|
||||
def encode(self, s):
|
||||
"""Encode string with output encoding."""
|
||||
assert isinstance(s, str_text)
|
||||
return s.encode(self.output_encoding, self.codec_errors)
|
||||
|
||||
def init_fileoutput (self, args):
|
||||
def init_fileoutput(self, args):
|
||||
"""
|
||||
Initialize self.fd file descriptor from args. For file output
|
||||
(used when the fileoutput arg is given), the self.fd
|
||||
|
|
@ -226,7 +226,7 @@ class _Logger (abc.ABC):
|
|||
else:
|
||||
self.fd = self.create_fd()
|
||||
|
||||
def start_fileoutput (self):
|
||||
def start_fileoutput(self):
|
||||
"""Start output to configured file."""
|
||||
path = os.path.dirname(self.filename)
|
||||
try:
|
||||
|
|
@ -243,7 +243,7 @@ class _Logger (abc.ABC):
|
|||
self.is_active = False
|
||||
self.filename = None
|
||||
|
||||
def create_fd (self):
|
||||
def create_fd(self):
|
||||
"""Create open file descriptor."""
|
||||
if self.filename is None:
|
||||
return i18n.get_encoded_writer(encoding=self.output_encoding,
|
||||
|
|
@ -251,7 +251,7 @@ class _Logger (abc.ABC):
|
|||
return codecs.open(self.filename, "wb", self.output_encoding,
|
||||
self.codec_errors)
|
||||
|
||||
def close_fileoutput (self):
|
||||
def close_fileoutput(self):
|
||||
"""
|
||||
Flush and close the file output denoted by self.fd.
|
||||
"""
|
||||
|
|
@ -269,7 +269,7 @@ class _Logger (abc.ABC):
|
|||
pass
|
||||
self.fd = None
|
||||
|
||||
def check_date (self):
|
||||
def check_date(self):
|
||||
"""
|
||||
Check for special dates.
|
||||
"""
|
||||
|
|
@ -278,14 +278,14 @@ class _Logger (abc.ABC):
|
|||
msg = _("Happy birthday for LinkChecker, I'm %d years old today!")
|
||||
self.comment(msg % (now.year - 2000))
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write a comment and a newline. This method just prints
|
||||
the given string.
|
||||
"""
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def wrap (self, lines, width):
|
||||
def wrap(self, lines, width):
|
||||
"""
|
||||
Return wrapped version of given lines.
|
||||
"""
|
||||
|
|
@ -297,7 +297,7 @@ class _Logger (abc.ABC):
|
|||
break_on_hyphens=False)
|
||||
return strformat.wrap(text, width, **kwargs).lstrip()
|
||||
|
||||
def write (self, s, **args):
|
||||
def write(self, s, **args):
|
||||
"""Write string to output descriptor. Strips control characters
|
||||
from string before writing.
|
||||
"""
|
||||
|
|
@ -318,13 +318,13 @@ class _Logger (abc.ABC):
|
|||
self.fd = dummy.Dummy()
|
||||
self.is_active = False
|
||||
|
||||
def writeln (self, s="", **args):
|
||||
def writeln(self, s="", **args):
|
||||
"""
|
||||
Write string to output descriptor plus a newline.
|
||||
"""
|
||||
self.write("%s%s" % (s, os.linesep), **args)
|
||||
|
||||
def has_part (self, name):
|
||||
def has_part(self, name):
|
||||
"""
|
||||
See if given part name will be logged.
|
||||
"""
|
||||
|
|
@ -333,19 +333,19 @@ class _Logger (abc.ABC):
|
|||
return True
|
||||
return name in self.logparts
|
||||
|
||||
def part (self, name):
|
||||
def part(self, name):
|
||||
"""
|
||||
Return translated part name.
|
||||
"""
|
||||
return _(Fields.get(name, ""))
|
||||
|
||||
def spaces (self, name):
|
||||
def spaces(self, name):
|
||||
"""
|
||||
Return indent of spaces for given part name.
|
||||
"""
|
||||
return self.logspaces[name]
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Start log output.
|
||||
"""
|
||||
|
|
@ -363,7 +363,7 @@ class _Logger (abc.ABC):
|
|||
self.stats.reset()
|
||||
self.starttime = time.time()
|
||||
|
||||
def log_filter_url (self, url_data, do_print):
|
||||
def log_filter_url(self, url_data, do_print):
|
||||
"""
|
||||
Log a new url with this logger if do_print is True. Else
|
||||
only update accounting data.
|
||||
|
|
@ -372,7 +372,7 @@ class _Logger (abc.ABC):
|
|||
if do_print:
|
||||
self.log_url(url_data)
|
||||
|
||||
def write_intro (self):
|
||||
def write_intro(self):
|
||||
"""Write intro comments."""
|
||||
self.comment(_("created by %(app)s at %(time)s") %
|
||||
{"app": configuration.AppName,
|
||||
|
|
@ -383,7 +383,7 @@ class _Logger (abc.ABC):
|
|||
{'url': configuration.SupportUrl})
|
||||
self.check_date()
|
||||
|
||||
def write_outro (self):
|
||||
def write_outro(self):
|
||||
"""Write outro comments."""
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
|
|
@ -392,32 +392,32 @@ class _Logger (abc.ABC):
|
|||
"duration": strformat.strduration_long(duration)})
|
||||
|
||||
@abc.abstractmethod
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Log a new url with this logger.
|
||||
"""
|
||||
pass
|
||||
|
||||
@abc.abstractmethod
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
End of output, used for cleanup (eg output buffer flushing).
|
||||
"""
|
||||
pass
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""
|
||||
Return class name.
|
||||
"""
|
||||
return self.__class__.__name__
|
||||
|
||||
def __repr__ (self):
|
||||
def __repr__(self):
|
||||
"""
|
||||
Return class name.
|
||||
"""
|
||||
return repr(self.__class__.__name__)
|
||||
|
||||
def flush (self):
|
||||
def flush(self):
|
||||
"""
|
||||
If the logger has internal buffers, flush them.
|
||||
Ignore flush I/O errors since we are not responsible for proper
|
||||
|
|
@ -429,7 +429,7 @@ class _Logger (abc.ABC):
|
|||
except (IOError, AttributeError):
|
||||
pass
|
||||
|
||||
def log_internal_error (self):
|
||||
def log_internal_error(self):
|
||||
"""Indicate that an internal error occurred in the program."""
|
||||
log.warn(LOG_CHECK, "internal error occurred")
|
||||
self.stats.log_internal_error()
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ from linkcheck.configuration import get_user_data
|
|||
from . import _Logger
|
||||
|
||||
|
||||
class BlacklistLogger (_Logger):
|
||||
class BlacklistLogger(_Logger):
|
||||
"""
|
||||
Updates a blacklist of wrong links. If a link on the blacklist
|
||||
is working (again), it is removed from the list. So after n days
|
||||
|
|
@ -36,7 +36,7 @@ class BlacklistLogger (_Logger):
|
|||
"filename": os.path.join(get_user_data(), "blacklist"),
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Intialize with old blacklist data (if found, else not)."""
|
||||
args = self.get_args(kwargs)
|
||||
super(BlacklistLogger, self).__init__(**args)
|
||||
|
|
@ -45,13 +45,13 @@ class BlacklistLogger (_Logger):
|
|||
if self.filename is not None and os.path.exists(self.filename):
|
||||
self.read_blacklist()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Put invalid url in blacklist, delete valid url from blacklist.
|
||||
"""
|
||||
|
|
@ -66,13 +66,13 @@ class BlacklistLogger (_Logger):
|
|||
if not url_data.valid:
|
||||
self.blacklist[key] = 1
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write blacklist file.
|
||||
"""
|
||||
self.write_blacklist()
|
||||
|
||||
def read_blacklist (self):
|
||||
def read_blacklist(self):
|
||||
"""
|
||||
Read a previously stored blacklist from file fd.
|
||||
"""
|
||||
|
|
@ -85,7 +85,7 @@ class BlacklistLogger (_Logger):
|
|||
value, key = line.split(None, 1)
|
||||
self.blacklist[key] = int(value)
|
||||
|
||||
def write_blacklist (self):
|
||||
def write_blacklist(self):
|
||||
"""
|
||||
Write the blacklist.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ Columns = (
|
|||
)
|
||||
|
||||
|
||||
class CSVLogger (_Logger):
|
||||
class CSVLogger(_Logger):
|
||||
"""
|
||||
CSV output, consisting of one line per entry. Entries are
|
||||
separated by a separator (a semicolon per default).
|
||||
|
|
@ -45,7 +45,7 @@ class CSVLogger (_Logger):
|
|||
"dialect": "excel",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Store default separator and (os dependent) line terminator."""
|
||||
args = self.get_args(kwargs)
|
||||
super(CSVLogger, self).__init__(**args)
|
||||
|
|
@ -55,11 +55,11 @@ class CSVLogger (_Logger):
|
|||
self.dialect = args['dialect']
|
||||
self.linesep = os.linesep
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write CSV comment."""
|
||||
self.writeln(s="# %s" % s, **args)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write checking start info as csv comment."""
|
||||
super(CSVLogger, self).start_output()
|
||||
row = []
|
||||
|
|
@ -79,7 +79,7 @@ class CSVLogger (_Logger):
|
|||
if row:
|
||||
self.writerow(row)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write csv formatted url check info."""
|
||||
row = []
|
||||
if self.has_part("urlname"):
|
||||
|
|
@ -119,7 +119,7 @@ class CSVLogger (_Logger):
|
|||
self.writerow(map(strformat.unicode_safe, row))
|
||||
self.flush()
|
||||
|
||||
def writerow (self, row):
|
||||
def writerow(self, row):
|
||||
"""Write one row in CSV format."""
|
||||
self.writer.writerow(row)
|
||||
# Fetch UTF-8 output from the queue ...
|
||||
|
|
@ -134,7 +134,7 @@ class CSVLogger (_Logger):
|
|||
self.queue.seek(0)
|
||||
self.queue.truncate(0)
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of checking info as csv comment."""
|
||||
if self.has_part("outro"):
|
||||
self.write_outro()
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from .. import strformat
|
|||
from builtins import str as str_text
|
||||
|
||||
|
||||
class CustomXMLLogger (xmllog._XMLLogger):
|
||||
class CustomXMLLogger(xmllog._XMLLogger):
|
||||
"""
|
||||
XML custom output for easy post-processing.
|
||||
"""
|
||||
|
|
@ -32,7 +32,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
|
|||
"filename": "linkchecker-out.xml",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Write start of checking info as xml comment.
|
||||
"""
|
||||
|
|
@ -42,7 +42,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
|
|||
self.xml_starttag('linkchecker', attrs)
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Log URL data in custom XML format.
|
||||
"""
|
||||
|
|
@ -95,7 +95,7 @@ class CustomXMLLogger (xmllog._XMLLogger):
|
|||
self.xml_endtag('urldata')
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write XML end tag.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ http://www.graphviz.org/doc/info/lang.html
|
|||
from .graph import _GraphLogger
|
||||
|
||||
|
||||
class DOTLogger (_GraphLogger):
|
||||
class DOTLogger(_GraphLogger):
|
||||
"""
|
||||
Generates .dot sitemap graphs. Use graphviz to see the sitemap graph.
|
||||
"""
|
||||
|
|
@ -32,7 +32,7 @@ class DOTLogger (_GraphLogger):
|
|||
"encoding": "ascii",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as DOT comment."""
|
||||
super(DOTLogger, self).start_output()
|
||||
if self.has_part("intro"):
|
||||
|
|
@ -44,12 +44,12 @@ class DOTLogger (_GraphLogger):
|
|||
self.writeln(" ];")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write DOT comment."""
|
||||
self.write("// ")
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node."""
|
||||
node = self.get_node(url_data)
|
||||
if node is not None:
|
||||
|
|
@ -66,7 +66,7 @@ class DOTLogger (_GraphLogger):
|
|||
self.writeln(" extern=%d," % node["extern"])
|
||||
self.writeln(" ];")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write edge from parent to node."""
|
||||
source = dotquote(self.nodes[node["parent_url"]]["label"])
|
||||
target = dotquote(node["label"])
|
||||
|
|
@ -76,11 +76,11 @@ class DOTLogger (_GraphLogger):
|
|||
self.writeln(" valid=%d," % node["valid"])
|
||||
self.writeln(" ];")
|
||||
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end of graph marker."""
|
||||
self.writeln("}")
|
||||
|
||||
|
||||
def dotquote (s):
|
||||
def dotquote(s):
|
||||
"""Quote string for usage in DOT output format."""
|
||||
return s.replace('"', '\\"')
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ A gml logger.
|
|||
from .graph import _GraphLogger
|
||||
|
||||
|
||||
class GMLLogger (_GraphLogger):
|
||||
class GMLLogger(_GraphLogger):
|
||||
"""GML means Graph Modeling Language. Use a GML tool to see
|
||||
the sitemap graph."""
|
||||
|
||||
|
|
@ -29,7 +29,7 @@ class GMLLogger (_GraphLogger):
|
|||
"filename": "linkchecker-out.gml",
|
||||
}
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as gml comment."""
|
||||
super(GMLLogger, self).start_output()
|
||||
if self.has_part("intro"):
|
||||
|
|
@ -39,11 +39,11 @@ class GMLLogger (_GraphLogger):
|
|||
self.writeln(" directed 1")
|
||||
self.flush()
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write GML comment."""
|
||||
self.writeln(s='comment "%s"' % s, **args)
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node."""
|
||||
node = self.get_node(url_data)
|
||||
if node:
|
||||
|
|
@ -62,7 +62,7 @@ class GMLLogger (_GraphLogger):
|
|||
self.writeln(" extern %d" % node["extern"])
|
||||
self.writeln(" ]")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write one edge."""
|
||||
self.writeln(" edge [")
|
||||
self.writeln(' label "%s"' % node["edge"])
|
||||
|
|
@ -72,6 +72,6 @@ class GMLLogger (_GraphLogger):
|
|||
self.writeln(" valid %d" % node["valid"])
|
||||
self.writeln(" ]")
|
||||
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end of graph marker."""
|
||||
self.writeln("]")
|
||||
|
|
|
|||
|
|
@ -21,10 +21,10 @@ from ..decorators import notimplemented
|
|||
import re
|
||||
|
||||
|
||||
class _GraphLogger (_Logger):
|
||||
class _GraphLogger(_Logger):
|
||||
"""Provide base method to get node data."""
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(_GraphLogger, self).__init__(**args)
|
||||
|
|
@ -41,7 +41,7 @@ class _GraphLogger (_Logger):
|
|||
if url_data.valid:
|
||||
self.log_url(url_data)
|
||||
|
||||
def get_node (self, url_data):
|
||||
def get_node(self, url_data):
|
||||
"""Return new node data or None if node already exists."""
|
||||
if not url_data.url:
|
||||
return None
|
||||
|
|
@ -63,7 +63,7 @@ class _GraphLogger (_Logger):
|
|||
self.nodeid += 1
|
||||
return node
|
||||
|
||||
def write_edges (self):
|
||||
def write_edges(self):
|
||||
"""
|
||||
Write all edges we can find in the graph in a brute-force manner.
|
||||
"""
|
||||
|
|
@ -73,16 +73,16 @@ class _GraphLogger (_Logger):
|
|||
self.flush()
|
||||
|
||||
@notimplemented
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write edge data for one node and its parent."""
|
||||
pass
|
||||
|
||||
@notimplemented
|
||||
def end_graph (self):
|
||||
def end_graph(self):
|
||||
"""Write end-of-graph marker."""
|
||||
pass
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write edges and end of checking info as gml comment."""
|
||||
self.write_edges()
|
||||
self.end_graph()
|
||||
|
|
@ -93,7 +93,7 @@ class _GraphLogger (_Logger):
|
|||
|
||||
_disallowed = re.compile(r"[^a-zA-Z0-9 '#(){}\-\[\]\.,;:\!\?]+")
|
||||
|
||||
def quote (s):
|
||||
def quote(s):
|
||||
"""Replace disallowed characters in node or edge labels.
|
||||
Also remove whitespace from beginning or end of label."""
|
||||
return _disallowed.sub(" ", s).strip()
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ from .xmllog import _XMLLogger
|
|||
from .graph import _GraphLogger
|
||||
|
||||
|
||||
class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
||||
class GraphXMLLogger(_XMLLogger, _GraphLogger):
|
||||
"""XML output mirroring the GML structure. Easy to parse with any XML
|
||||
tool."""
|
||||
|
||||
|
|
@ -31,14 +31,14 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
|||
"filename": "linkchecker-out.gxml",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(GraphXMLLogger, self).__init__(**args)
|
||||
self.nodes = {}
|
||||
self.nodeid = 0
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as xml comment."""
|
||||
super(GraphXMLLogger, self).start_output()
|
||||
self.xml_start_output()
|
||||
|
|
@ -46,7 +46,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
|||
self.xml_starttag('graph', attrs={"isDirected": "true"})
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write one node and all possible edges."""
|
||||
node = self.get_node(url_data)
|
||||
if node:
|
||||
|
|
@ -66,7 +66,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
|||
self.xml_endtag("data")
|
||||
self.xml_endtag("node")
|
||||
|
||||
def write_edge (self, node):
|
||||
def write_edge(self, node):
|
||||
"""Write one edge."""
|
||||
attrs = {
|
||||
"source": "%d" % self.nodes[node["parent_url"]]["id"],
|
||||
|
|
@ -80,7 +80,7 @@ class GraphXMLLogger (_XMLLogger, _GraphLogger):
|
|||
self.xml_endtag("data")
|
||||
self.xml_endtag("edge")
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Finish graph output, and print end of checking info as xml
|
||||
comment."""
|
||||
self.xml_endtag("graph")
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ HTML_HEADER = """<!DOCTYPE HTML>
|
|||
"""
|
||||
|
||||
|
||||
class HtmlLogger (_Logger):
|
||||
class HtmlLogger(_Logger):
|
||||
"""Logger with HTML output."""
|
||||
|
||||
LoggerName = 'html'
|
||||
|
|
@ -75,7 +75,7 @@ class HtmlLogger (_Logger):
|
|||
'colorok': '#3ba557',
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize default HTML color values."""
|
||||
args = self.get_args(kwargs)
|
||||
super(HtmlLogger, self).__init__(**args)
|
||||
|
|
@ -88,17 +88,17 @@ class HtmlLogger (_Logger):
|
|||
self.colorerror = args['colorerror']
|
||||
self.colorok = args['colorok']
|
||||
|
||||
def part (self, name):
|
||||
def part(self, name):
|
||||
"""Return non-space-breakable part name."""
|
||||
return super(HtmlLogger, self).part(name).replace(" ", " ")
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""Write HTML comment."""
|
||||
self.write("<!-- ")
|
||||
self.write(s, **args)
|
||||
self.write(" -->")
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info."""
|
||||
super(HtmlLogger, self).start_output()
|
||||
header = {
|
||||
|
|
@ -125,7 +125,7 @@ class HtmlLogger (_Logger):
|
|||
self.check_date()
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write url checking info as HTML."""
|
||||
self.write_table_start()
|
||||
if self.has_part("url"):
|
||||
|
|
@ -155,21 +155,21 @@ class HtmlLogger (_Logger):
|
|||
self.write_table_end()
|
||||
self.flush()
|
||||
|
||||
def write_table_start (self):
|
||||
def write_table_start(self):
|
||||
"""Start html table."""
|
||||
self.writeln('<br/><br/><table>')
|
||||
|
||||
def write_table_end (self):
|
||||
def write_table_end(self):
|
||||
"""End html table."""
|
||||
self.write('</table><br/>')
|
||||
|
||||
def write_id (self):
|
||||
def write_id(self):
|
||||
"""Write ID for current URL."""
|
||||
self.writeln("<tr>")
|
||||
self.writeln('<td>%s</td>' % self.part("id"))
|
||||
self.write("<td>%d</td></tr>" % self.stats.number)
|
||||
|
||||
def write_url (self, url_data):
|
||||
def write_url(self, url_data):
|
||||
"""Write url_data.base_url."""
|
||||
self.writeln("<tr>")
|
||||
self.writeln('<td class="url">%s</td>' % self.part("url"))
|
||||
|
|
@ -177,12 +177,12 @@ class HtmlLogger (_Logger):
|
|||
self.write("`%s'" % html.escape(url_data.base_url))
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_name (self, url_data):
|
||||
def write_name(self, url_data):
|
||||
"""Write url_data.name."""
|
||||
args = (self.part("name"), html.escape(url_data.name))
|
||||
self.writeln("<tr><td>%s</td><td>`%s'</td></tr>" % args)
|
||||
|
||||
def write_parent (self, url_data):
|
||||
def write_parent(self, url_data):
|
||||
"""Write url_data.parent_url."""
|
||||
self.write("<tr><td>"+self.part("parenturl")+
|
||||
'</td><td><a target="top" href="'+
|
||||
|
|
@ -203,35 +203,35 @@ class HtmlLogger (_Logger):
|
|||
self.write('(<a href="'+vcss+'">CSS</a>)')
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_base (self, url_data):
|
||||
def write_base(self, url_data):
|
||||
"""Write url_data.base_ref."""
|
||||
self.writeln("<tr><td>"+self.part("base")+"</td><td>"+
|
||||
html.escape(url_data.base_ref)+"</td></tr>")
|
||||
|
||||
def write_real (self, url_data):
|
||||
def write_real(self, url_data):
|
||||
"""Write url_data.url."""
|
||||
self.writeln("<tr><td>"+self.part("realurl")+"</td><td>"+
|
||||
'<a target="top" href="'+url_data.url+
|
||||
'">'+html.escape(url_data.url)+"</a></td></tr>")
|
||||
|
||||
def write_dltime (self, url_data):
|
||||
def write_dltime(self, url_data):
|
||||
"""Write url_data.dltime."""
|
||||
self.writeln("<tr><td>"+self.part("dltime")+"</td><td>"+
|
||||
(_("%.3f seconds") % url_data.dltime)+
|
||||
"</td></tr>")
|
||||
|
||||
def write_size (self, url_data):
|
||||
def write_size(self, url_data):
|
||||
"""Write url_data.size."""
|
||||
self.writeln("<tr><td>"+self.part("dlsize")+"</td><td>"+
|
||||
strformat.strsize(url_data.size)+
|
||||
"</td></tr>")
|
||||
|
||||
def write_checktime (self, url_data):
|
||||
def write_checktime(self, url_data):
|
||||
"""Write url_data.checktime."""
|
||||
self.writeln("<tr><td>"+self.part("checktime")+"</td><td>"+
|
||||
(_("%.3f seconds") % url_data.checktime)+"</td></tr>")
|
||||
|
||||
def write_info (self, url_data):
|
||||
def write_info(self, url_data):
|
||||
"""Write url_data.info."""
|
||||
sep = "<br/>"+os.linesep
|
||||
text = sep.join(html.escape(x) for x in url_data.info)
|
||||
|
|
@ -244,7 +244,7 @@ class HtmlLogger (_Logger):
|
|||
self.writeln('<tr><td valign="top">' + self.part("modified") +
|
||||
"</td><td>"+text+"</td></tr>")
|
||||
|
||||
def write_warning (self, url_data):
|
||||
def write_warning(self, url_data):
|
||||
"""Write url_data.warnings."""
|
||||
sep = "<br/>"+os.linesep
|
||||
text = sep.join(html.escape(x[1]) for x in url_data.warnings)
|
||||
|
|
@ -252,7 +252,7 @@ class HtmlLogger (_Logger):
|
|||
'valign="top">' + self.part("warning") +
|
||||
'</td><td class="warning">' + text + "</td></tr>")
|
||||
|
||||
def write_result (self, url_data):
|
||||
def write_result(self, url_data):
|
||||
"""Write url_data.result."""
|
||||
if url_data.valid:
|
||||
self.write('<tr><td class="valid">')
|
||||
|
|
@ -268,7 +268,7 @@ class HtmlLogger (_Logger):
|
|||
self.write(": "+html.escape(url_data.result))
|
||||
self.writeln("</td></tr>")
|
||||
|
||||
def write_stats (self):
|
||||
def write_stats(self):
|
||||
"""Write check statistic infos."""
|
||||
self.writeln('<br/><i>%s</i><br/>' % _("Statistics"))
|
||||
if self.stats.number > 0:
|
||||
|
|
@ -285,7 +285,7 @@ class HtmlLogger (_Logger):
|
|||
self.writeln(_("No statistics available since no URLs were checked."))
|
||||
self.writeln("<br/>")
|
||||
|
||||
def write_outro (self):
|
||||
def write_outro(self):
|
||||
"""Write end of check message."""
|
||||
self.writeln("<br/>")
|
||||
self.write(_("That's it.")+" ")
|
||||
|
|
@ -326,7 +326,7 @@ class HtmlLogger (_Logger):
|
|||
configuration.SupportUrl+"</a>.<br/>"))
|
||||
self.writeln("</small></body></html>")
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of checking info as HTML."""
|
||||
if self.has_part("stats"):
|
||||
self.write_stats()
|
||||
|
|
|
|||
|
|
@ -19,30 +19,30 @@ A dummy logger.
|
|||
from . import _Logger
|
||||
|
||||
|
||||
class NoneLogger (_Logger):
|
||||
class NoneLogger(_Logger):
|
||||
"""
|
||||
Dummy logger printing nothing.
|
||||
"""
|
||||
|
||||
LoggerName = 'none'
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Do nothing."""
|
||||
pass
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Do nothing.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ ChangeFreqs = (
|
|||
HTTP_SCHEMES = ('http:', 'https:')
|
||||
HTML_TYPES = ('text/html', "application/xhtml+xml")
|
||||
|
||||
class SitemapXmlLogger (xmllog._XMLLogger):
|
||||
class SitemapXmlLogger(xmllog._XMLLogger):
|
||||
"""Sitemap XML output according to http://www.sitemaps.org/protocol.html
|
||||
"""
|
||||
|
||||
|
|
@ -43,7 +43,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
|
|||
"encoding": "utf-8",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize graph node list and internal id counter."""
|
||||
args = self.get_args(kwargs)
|
||||
super(SitemapXmlLogger, self).__init__(**args)
|
||||
|
|
@ -63,7 +63,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
|
|||
if 'priority' in args:
|
||||
self.priority = float(args['priority'])
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write start of checking info as xml comment."""
|
||||
super(SitemapXmlLogger, self).start_output()
|
||||
self.xml_start_output()
|
||||
|
|
@ -101,7 +101,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
|
|||
and url_data.content_type in HTML_TYPES):
|
||||
self.log_url(url_data, priority=priority)
|
||||
|
||||
def log_url (self, url_data, priority=None):
|
||||
def log_url(self, url_data, priority=None):
|
||||
"""Log URL data in sitemap format."""
|
||||
self.xml_starttag('url')
|
||||
self.xml_tag('loc', url_data.url)
|
||||
|
|
@ -112,7 +112,7 @@ class SitemapXmlLogger (xmllog._XMLLogger):
|
|||
self.xml_endtag('url')
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write XML end tag."""
|
||||
self.xml_endtag("urlset")
|
||||
self.xml_end_output()
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from . import _Logger
|
|||
from .. import url as urlutil
|
||||
|
||||
|
||||
def sqlify (s):
|
||||
def sqlify(s):
|
||||
"""
|
||||
Escape special SQL chars and strings.
|
||||
"""
|
||||
|
|
@ -31,7 +31,7 @@ def sqlify (s):
|
|||
return "'%s'" % s.replace("'", "''").replace(os.linesep, r"\n")
|
||||
|
||||
|
||||
def intify (s):
|
||||
def intify(s):
|
||||
"""
|
||||
Coerce a truth value to 0/1.
|
||||
|
||||
|
|
@ -45,7 +45,7 @@ def intify (s):
|
|||
return 0
|
||||
|
||||
|
||||
class SQLLogger (_Logger):
|
||||
class SQLLogger(_Logger):
|
||||
"""
|
||||
SQL output, should work with any SQL database (not tested).
|
||||
"""
|
||||
|
|
@ -58,7 +58,7 @@ class SQLLogger (_Logger):
|
|||
'dbname': 'linksdb',
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize database access data."""
|
||||
args = self.get_args(kwargs)
|
||||
super(SQLLogger, self).__init__(**args)
|
||||
|
|
@ -66,14 +66,14 @@ class SQLLogger (_Logger):
|
|||
self.dbname = args['dbname']
|
||||
self.separator = args['separator']
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write SQL comment.
|
||||
"""
|
||||
self.write("-- ")
|
||||
self.writeln(s=s, **args)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Write start of checking info as sql comment.
|
||||
"""
|
||||
|
|
@ -83,7 +83,7 @@ class SQLLogger (_Logger):
|
|||
self.writeln()
|
||||
self.flush()
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Store url check info into the database.
|
||||
"""
|
||||
|
|
@ -130,7 +130,7 @@ class SQLLogger (_Logger):
|
|||
})
|
||||
self.flush()
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""
|
||||
Write end of checking info as sql comment.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ from .. import ansicolor, strformat, configuration, i18n
|
|||
from builtins import str as str_text
|
||||
|
||||
|
||||
class TextLogger (_Logger):
|
||||
class TextLogger(_Logger):
|
||||
"""
|
||||
A text logger, colorizing the output if possible.
|
||||
|
||||
|
|
@ -52,7 +52,7 @@ class TextLogger (_Logger):
|
|||
'colorreset': "default",
|
||||
}
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""Initialize error counter and optional file output."""
|
||||
args = self.get_args(kwargs)
|
||||
super(TextLogger, self).__init__(**args)
|
||||
|
|
@ -71,27 +71,27 @@ class TextLogger (_Logger):
|
|||
self.colordlsize = args.get('colordlsize', 'default')
|
||||
self.colorreset = args.get('colorreset', 'default')
|
||||
|
||||
def init_fileoutput (self, args):
|
||||
def init_fileoutput(self, args):
|
||||
"""Colorize file output if possible."""
|
||||
super(TextLogger, self).init_fileoutput(args)
|
||||
if self.fd is not None:
|
||||
self.fd = ansicolor.Colorizer(self.fd)
|
||||
|
||||
def start_fileoutput (self):
|
||||
def start_fileoutput(self):
|
||||
"""Needed to make file descriptor color aware."""
|
||||
init_color = self.fd is None
|
||||
super(TextLogger, self).start_fileoutput()
|
||||
if init_color:
|
||||
self.fd = ansicolor.Colorizer(self.fd)
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""Write generic start checking info."""
|
||||
super(TextLogger, self).start_output()
|
||||
if self.has_part('intro'):
|
||||
self.write_intro()
|
||||
self.flush()
|
||||
|
||||
def write_intro (self):
|
||||
def write_intro(self):
|
||||
"""Log introduction text."""
|
||||
self.writeln(configuration.AppInfo)
|
||||
self.writeln(configuration.Freeware)
|
||||
|
|
@ -104,7 +104,7 @@ class TextLogger (_Logger):
|
|||
self.writeln(_("Start checking at %s") %
|
||||
strformat.strtime(self.starttime))
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""Write url checking info."""
|
||||
self.writeln()
|
||||
if self.has_part('url'):
|
||||
|
|
@ -133,24 +133,24 @@ class TextLogger (_Logger):
|
|||
self.write_result(url_data)
|
||||
self.flush()
|
||||
|
||||
def write_id (self):
|
||||
def write_id(self):
|
||||
"""Write unique ID of url_data."""
|
||||
self.writeln()
|
||||
self.write(self.part('id') + self.spaces('id'))
|
||||
self.writeln("%d" % self.stats.number, color=self.colorinfo)
|
||||
|
||||
def write_url (self, url_data):
|
||||
def write_url(self, url_data):
|
||||
"""Write url_data.base_url."""
|
||||
self.write(self.part('url') + self.spaces('url'))
|
||||
txt = strformat.strline(url_data.base_url)
|
||||
self.writeln(txt, color=self.colorurl)
|
||||
|
||||
def write_name (self, url_data):
|
||||
def write_name(self, url_data):
|
||||
"""Write url_data.name."""
|
||||
self.write(self.part("name") + self.spaces("name"))
|
||||
self.writeln(strformat.strline(url_data.name), color=self.colorname)
|
||||
|
||||
def write_parent (self, url_data):
|
||||
def write_parent(self, url_data):
|
||||
"""Write url_data.parent_url."""
|
||||
self.write(self.part('parenturl') + self.spaces("parenturl"))
|
||||
txt = url_data.parent_url
|
||||
|
|
@ -162,35 +162,35 @@ class TextLogger (_Logger):
|
|||
txt += _(", page %d") % url_data.page
|
||||
self.writeln(txt, color=self.colorparent)
|
||||
|
||||
def write_base (self, url_data):
|
||||
def write_base(self, url_data):
|
||||
"""Write url_data.base_ref."""
|
||||
self.write(self.part("base") + self.spaces("base"))
|
||||
self.writeln(url_data.base_ref, color=self.colorbase)
|
||||
|
||||
def write_real (self, url_data):
|
||||
def write_real(self, url_data):
|
||||
"""Write url_data.url."""
|
||||
self.write(self.part("realurl") + self.spaces("realurl"))
|
||||
self.writeln(str_text(url_data.url), color=self.colorreal)
|
||||
|
||||
def write_dltime (self, url_data):
|
||||
def write_dltime(self, url_data):
|
||||
"""Write url_data.dltime."""
|
||||
self.write(self.part("dltime") + self.spaces("dltime"))
|
||||
self.writeln(_("%.3f seconds") % url_data.dltime,
|
||||
color=self.colordltime)
|
||||
|
||||
def write_size (self, url_data):
|
||||
def write_size(self, url_data):
|
||||
"""Write url_data.size."""
|
||||
self.write(self.part("dlsize") + self.spaces("dlsize"))
|
||||
self.writeln(strformat.strsize(url_data.size),
|
||||
color=self.colordlsize)
|
||||
|
||||
def write_checktime (self, url_data):
|
||||
def write_checktime(self, url_data):
|
||||
"""Write url_data.checktime."""
|
||||
self.write(self.part("checktime") + self.spaces("checktime"))
|
||||
self.writeln(_("%.3f seconds") % url_data.checktime,
|
||||
color=self.colordltime)
|
||||
|
||||
def write_info (self, url_data):
|
||||
def write_info(self, url_data):
|
||||
"""Write url_data.info."""
|
||||
self.write(self.part("info") + self.spaces("info"))
|
||||
self.writeln(self.wrap(url_data.info, 65), color=self.colorinfo)
|
||||
|
|
@ -200,13 +200,13 @@ class TextLogger (_Logger):
|
|||
self.write(self.part("modified") + self.spaces("modified"))
|
||||
self.writeln(self.format_modified(url_data.modified))
|
||||
|
||||
def write_warning (self, url_data):
|
||||
def write_warning(self, url_data):
|
||||
"""Write url_data.warning."""
|
||||
self.write(self.part("warning") + self.spaces("warning"))
|
||||
warning_msgs = ["[%s] %s" % x for x in url_data.warnings]
|
||||
self.writeln(self.wrap(warning_msgs, 65), color=self.colorwarning)
|
||||
|
||||
def write_result (self, url_data):
|
||||
def write_result(self, url_data):
|
||||
"""Write url_data.result."""
|
||||
self.write(self.part("result") + self.spaces("result"))
|
||||
if url_data.valid:
|
||||
|
|
@ -219,7 +219,7 @@ class TextLogger (_Logger):
|
|||
self.write(": " + url_data.result, color=color)
|
||||
self.writeln()
|
||||
|
||||
def write_outro (self, interrupt=False):
|
||||
def write_outro(self, interrupt=False):
|
||||
"""Write end of checking message."""
|
||||
self.writeln()
|
||||
if interrupt:
|
||||
|
|
@ -264,7 +264,7 @@ class TextLogger (_Logger):
|
|||
{"time": strformat.strtime(self.stoptime),
|
||||
"duration": strformat.strduration_long(duration)})
|
||||
|
||||
def write_stats (self):
|
||||
def write_stats(self):
|
||||
"""Write check statistic info."""
|
||||
self.writeln()
|
||||
self.writeln(_("Statistics:"))
|
||||
|
|
@ -282,7 +282,7 @@ class TextLogger (_Logger):
|
|||
else:
|
||||
self.writeln(_("No statistics available since no URLs were checked."))
|
||||
|
||||
def end_output (self, **kwargs):
|
||||
def end_output(self, **kwargs):
|
||||
"""Write end of output info, and flush all output buffers."""
|
||||
self.stats.downloaded_bytes = kwargs.get("downloaded_bytes")
|
||||
self.stats.num_urls = kwargs.get("num_urls")
|
||||
|
|
|
|||
|
|
@ -29,24 +29,24 @@ xmlattr_entities = {
|
|||
}
|
||||
|
||||
|
||||
def xmlquote (s):
|
||||
def xmlquote(s):
|
||||
"""
|
||||
Quote characters for XML.
|
||||
"""
|
||||
return xml.sax.saxutils.escape(s)
|
||||
|
||||
|
||||
def xmlquoteattr (s):
|
||||
def xmlquoteattr(s):
|
||||
"""
|
||||
Quote XML attribute, ready for inclusion with double quotes.
|
||||
"""
|
||||
return xml.sax.saxutils.escape(s, xmlattr_entities)
|
||||
|
||||
|
||||
class _XMLLogger (_Logger):
|
||||
class _XMLLogger(_Logger):
|
||||
"""Base class for XML output; easy to parse with any XML tool."""
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
""" Initialize graph node list and internal id counter. """
|
||||
args = self.get_args(kwargs)
|
||||
super(_XMLLogger, self).__init__(**args)
|
||||
|
|
@ -54,7 +54,7 @@ class _XMLLogger (_Logger):
|
|||
self.indent = " "
|
||||
self.level = 0
|
||||
|
||||
def comment (self, s, **args):
|
||||
def comment(self, s, **args):
|
||||
"""
|
||||
Write XML comment.
|
||||
"""
|
||||
|
|
@ -62,7 +62,7 @@ class _XMLLogger (_Logger):
|
|||
self.write(s, **args)
|
||||
self.writeln(" -->")
|
||||
|
||||
def xml_start_output (self):
|
||||
def xml_start_output(self):
|
||||
"""
|
||||
Write start of checking info as xml comment.
|
||||
"""
|
||||
|
|
@ -72,14 +72,14 @@ class _XMLLogger (_Logger):
|
|||
self.write_intro()
|
||||
self.writeln()
|
||||
|
||||
def xml_end_output (self):
|
||||
def xml_end_output(self):
|
||||
"""
|
||||
Write end of checking info as xml comment.
|
||||
"""
|
||||
if self.has_part("outro"):
|
||||
self.write_outro()
|
||||
|
||||
def xml_starttag (self, name, attrs=None):
|
||||
def xml_starttag(self, name, attrs=None):
|
||||
"""
|
||||
Write XML start tag.
|
||||
"""
|
||||
|
|
@ -92,7 +92,7 @@ class _XMLLogger (_Logger):
|
|||
self.writeln(">")
|
||||
self.level += 1
|
||||
|
||||
def xml_endtag (self, name):
|
||||
def xml_endtag(self, name):
|
||||
"""
|
||||
Write XML end tag.
|
||||
"""
|
||||
|
|
@ -101,7 +101,7 @@ class _XMLLogger (_Logger):
|
|||
self.write(self.indent*self.level)
|
||||
self.writeln("</%s>" % xmlquote(name))
|
||||
|
||||
def xml_tag (self, name, content, attrs=None):
|
||||
def xml_tag(self, name, content, attrs=None):
|
||||
"""
|
||||
Write XML tag with content.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ _proc_status = '/proc/%d/status' % os.getpid()
|
|||
_scale = {'kB': 1024.0, 'mB': 1024.0*1024.0,
|
||||
'KB': 1024.0, 'MB': 1024.0*1024.0}
|
||||
|
||||
def _VmB (VmKey):
|
||||
def _VmB(VmKey):
|
||||
"""Parse /proc/<pid>/status file for given key.
|
||||
|
||||
@return: requested number value of status entry
|
||||
|
|
@ -62,7 +62,7 @@ def _VmB (VmKey):
|
|||
return float(v[1]) * _scale[v[2]]
|
||||
|
||||
|
||||
def memory (since=0.0):
|
||||
def memory(since=0.0):
|
||||
"""Get memory usage.
|
||||
|
||||
@return: memory usage in bytes
|
||||
|
|
@ -71,7 +71,7 @@ def memory (since=0.0):
|
|||
return _VmB('VmSize:') - since
|
||||
|
||||
|
||||
def resident (since=0.0):
|
||||
def resident(since=0.0):
|
||||
"""Get resident memory usage.
|
||||
|
||||
@return: resident memory usage in bytes
|
||||
|
|
@ -80,7 +80,7 @@ def resident (since=0.0):
|
|||
return _VmB('VmRSS:') - since
|
||||
|
||||
|
||||
def stacksize (since=0.0):
|
||||
def stacksize(since=0.0):
|
||||
"""Get stack size.
|
||||
|
||||
@return: stack size in bytes
|
||||
|
|
|
|||
|
|
@ -59,7 +59,7 @@ PARSE_CONTENTS = {
|
|||
"application/xml+sitemap": re.compile(r'<\?xml[^<]+<urlset\s+', re.IGNORECASE),
|
||||
}
|
||||
|
||||
def guess_mimetype (filename, read=None):
|
||||
def guess_mimetype(filename, read=None):
|
||||
"""Return MIME type of file, or 'application/octet-stream' if it could
|
||||
not be determined."""
|
||||
mime, encoding = None, None
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import re
|
|||
import socket
|
||||
from .. import log, LOG_CHECK
|
||||
|
||||
def is_valid_ip (ip):
|
||||
def is_valid_ip(ip):
|
||||
"""
|
||||
Return True if given ip is a valid IPv4 or IPv6 address.
|
||||
"""
|
||||
|
|
@ -33,7 +33,7 @@ def is_valid_ip (ip):
|
|||
return True
|
||||
|
||||
|
||||
def resolve_host (host):
|
||||
def resolve_host(host):
|
||||
"""
|
||||
@host: hostname or IP address
|
||||
Return list of ip numbers for given host.
|
||||
|
|
|
|||
|
|
@ -41,35 +41,35 @@ def parse_url(url_data):
|
|||
url_data.aggregate.plugin_manager.run_parser_plugins(url_data, pagetype=key)
|
||||
|
||||
|
||||
def parse_html (url_data):
|
||||
def parse_html(url_data):
|
||||
"""Parse into HTML content and search for URLs to check.
|
||||
Found URLs are added to the URL queue.
|
||||
"""
|
||||
linkparse.find_links(url_data.get_soup(), url_data.add_url, linkparse.LinkTags)
|
||||
|
||||
|
||||
def parse_opera (url_data):
|
||||
def parse_opera(url_data):
|
||||
"""Parse an opera bookmark file."""
|
||||
from ..bookmarks.opera import parse_bookmark_data
|
||||
for url, name, lineno in parse_bookmark_data(url_data.get_content()):
|
||||
url_data.add_url(url, line=lineno, name=name)
|
||||
|
||||
|
||||
def parse_chromium (url_data):
|
||||
def parse_chromium(url_data):
|
||||
"""Parse a Chromium or Google Chrome bookmark file."""
|
||||
from ..bookmarks.chromium import parse_bookmark_data
|
||||
for url, name in parse_bookmark_data(url_data.get_content()):
|
||||
url_data.add_url(url, name=name)
|
||||
|
||||
|
||||
def parse_safari (url_data):
|
||||
def parse_safari(url_data):
|
||||
"""Parse a Safari bookmark file."""
|
||||
from ..bookmarks.safari import parse_bookmark_data
|
||||
for url, name in parse_bookmark_data(url_data.get_raw_content()):
|
||||
url_data.add_url(url, name=name)
|
||||
|
||||
|
||||
def parse_text (url_data):
|
||||
def parse_text(url_data):
|
||||
"""Parse a text file with one url per line; comment and blank
|
||||
lines are ignored."""
|
||||
lineno = 0
|
||||
|
|
@ -81,7 +81,7 @@ def parse_text (url_data):
|
|||
url_data.add_url(line, line=lineno)
|
||||
|
||||
|
||||
def parse_css (url_data):
|
||||
def parse_css(url_data):
|
||||
"""
|
||||
Parse a CSS file for url() patterns.
|
||||
"""
|
||||
|
|
@ -96,7 +96,7 @@ def parse_css (url_data):
|
|||
url_data.add_url(url, line=lineno, column=column)
|
||||
|
||||
|
||||
def parse_swf (url_data):
|
||||
def parse_swf(url_data):
|
||||
"""Parse a SWF file for URLs."""
|
||||
linkfinder = linkparse.swf_url_re.finditer
|
||||
for mo in linkfinder(url_data.get_raw_content()):
|
||||
|
|
@ -107,14 +107,14 @@ def parse_swf (url_data):
|
|||
url_data.add_url(url)
|
||||
|
||||
|
||||
def parse_wml (url_data):
|
||||
def parse_wml(url_data):
|
||||
"""Parse into WML content and search for URLs to check.
|
||||
Found URLs are added to the URL queue.
|
||||
"""
|
||||
linkparse.find_links(url_data.get_soup(), url_data.add_url, linkparse.WmlTags)
|
||||
|
||||
|
||||
def parse_firefox (url_data):
|
||||
def parse_firefox(url_data):
|
||||
"""Parse a Firefox3 bookmark file."""
|
||||
filename = url_data.get_os_filename()
|
||||
for url, name in firefox.parse_bookmark_file(filename):
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ class AnchorCheck(_ContentPlugin):
|
|||
linkparse.AnchorTags)
|
||||
self.check_anchor(url_data)
|
||||
|
||||
def add_anchor (self, url, line, column, name, base):
|
||||
def add_anchor(self, url, line, column, name, base):
|
||||
"""Add anchor URL."""
|
||||
self.anchors.append((url, line, column, name, base))
|
||||
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ class LocationInfo(_ConnectionPlugin):
|
|||
# no risks should be taken here by using a lock.
|
||||
_lock = get_lock("geoip")
|
||||
|
||||
def get_geoip_dat ():
|
||||
def get_geoip_dat():
|
||||
"""Find a GeoIP database, preferring city over country lookup."""
|
||||
datafiles = ("GeoIPCity.dat", "GeoIP.dat")
|
||||
if os.name == 'nt':
|
||||
|
|
@ -85,7 +85,7 @@ if geoip_dat:
|
|||
|
||||
|
||||
@synchronized(_lock)
|
||||
def get_location (host):
|
||||
def get_location(host):
|
||||
"""Get translated country and optional city name.
|
||||
|
||||
@return: country with optional city or an boolean False if not found
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ from .. import fileutil, log, LOG_PLUGIN
|
|||
|
||||
|
||||
_initialized = False
|
||||
def init_win32com ():
|
||||
def init_win32com():
|
||||
"""Initialize the win32com.client cache."""
|
||||
global _initialized
|
||||
if _initialized:
|
||||
|
|
@ -47,7 +47,7 @@ def init_win32com ():
|
|||
_initialized = True
|
||||
|
||||
|
||||
def has_word ():
|
||||
def has_word():
|
||||
"""Determine if Word is available on the current system."""
|
||||
if not has_win32com:
|
||||
return False
|
||||
|
|
@ -64,13 +64,13 @@ def has_word ():
|
|||
return False
|
||||
|
||||
|
||||
def constants (name):
|
||||
def constants(name):
|
||||
"""Helper to return constants. Avoids importing win32com.client in
|
||||
other modules."""
|
||||
return getattr(win32com.client.constants, name)
|
||||
|
||||
|
||||
def get_word_app ():
|
||||
def get_word_app():
|
||||
"""Return open Word.Application handle, or None if Word is not available
|
||||
on this system."""
|
||||
if not has_word():
|
||||
|
|
@ -84,18 +84,18 @@ def get_word_app ():
|
|||
return app
|
||||
|
||||
|
||||
def close_word_app (app):
|
||||
def close_word_app(app):
|
||||
"""Close Word application object."""
|
||||
app.Quit()
|
||||
|
||||
|
||||
def open_wordfile (app, filename):
|
||||
def open_wordfile(app, filename):
|
||||
"""Open given Word file with application object."""
|
||||
return app.Documents.Open(filename, ReadOnly=True,
|
||||
AddToRecentFiles=False, Visible=False, NoEncodingDialog=True)
|
||||
|
||||
|
||||
def close_wordfile (doc):
|
||||
def close_wordfile(doc):
|
||||
"""Close word file."""
|
||||
doc.Close()
|
||||
|
||||
|
|
@ -155,7 +155,7 @@ def get_line_number(doc, wrange):
|
|||
return lineno
|
||||
|
||||
|
||||
def get_temp_filename (content):
|
||||
def get_temp_filename(content):
|
||||
"""Get temporary filename for content to parse."""
|
||||
# store content in temporary file
|
||||
fd, filename = fileutil.get_temp_file(mode='wb', suffix='.doc',
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ class W3Timer:
|
|||
self.last_w3_call = 0
|
||||
|
||||
@synchronized(_w3_time_lock)
|
||||
def check_w3_time (self):
|
||||
def check_w3_time(self):
|
||||
"""Make sure the W3C validators are at most called once a second."""
|
||||
if time.time() - self.last_w3_call < W3Timer.SleepSeconds:
|
||||
time.sleep(W3Timer.SleepSeconds)
|
||||
|
|
@ -111,7 +111,7 @@ class CssSyntaxCheck(_ContentPlugin):
|
|||
log.warn(LOG_PLUGIN, _("CSS syntax check plugin error: %(msg)s ") % {"msg": msg})
|
||||
|
||||
|
||||
def check_w3_errors (url_data, xml, w3type):
|
||||
def check_w3_errors(url_data, xml, w3type):
|
||||
"""Add warnings for W3C HTML or CSS errors in xml format.
|
||||
w3type is either "W3C HTML" or "W3C CSS"."""
|
||||
dom = parseString(xml)
|
||||
|
|
@ -126,7 +126,7 @@ def check_w3_errors (url_data, xml, w3type):
|
|||
url_data.add_warning(warnmsg % attrs)
|
||||
|
||||
|
||||
def getXmlText (parent, tag):
|
||||
def getXmlText(parent, tag):
|
||||
"""Return XML content of given tag in parent element."""
|
||||
elem = parent.getElementsByTagName(tag)[0]
|
||||
# Yes, the DOM standard is awful.
|
||||
|
|
|
|||
|
|
@ -65,7 +65,7 @@ class VirusCheck(_ContentPlugin):
|
|||
return config
|
||||
|
||||
|
||||
class ClamavError (Exception):
|
||||
class ClamavError(Exception):
|
||||
"""Raised on clamav errors."""
|
||||
pass
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ class ClamavError (Exception):
|
|||
class ClamdScanner:
|
||||
"""Virus scanner using a clamd daemon process."""
|
||||
|
||||
def __init__ (self, clamav_conf):
|
||||
def __init__(self, clamav_conf):
|
||||
"""Initialize clamd daemon process sockets."""
|
||||
self.infected = []
|
||||
self.errors = []
|
||||
|
|
@ -82,7 +82,7 @@ class ClamdScanner:
|
|||
self.sock.getsockopt(socket.SOL_SOCKET, socket.SO_RCVBUF)
|
||||
self.wsock = self.new_scansock()
|
||||
|
||||
def new_scansock (self):
|
||||
def new_scansock(self):
|
||||
"""Return a connected socket for sending scan data to it."""
|
||||
port = None
|
||||
try:
|
||||
|
|
@ -108,11 +108,11 @@ class ClamdScanner:
|
|||
raise
|
||||
return wsock
|
||||
|
||||
def scan (self, data):
|
||||
def scan(self, data):
|
||||
"""Scan given data for viruses."""
|
||||
self.wsock.sendall(data)
|
||||
|
||||
def close (self):
|
||||
def close(self):
|
||||
"""Get results and close clamd daemon sockets."""
|
||||
self.wsock.close()
|
||||
data = self.sock.recv(self.sock_rcvbuf)
|
||||
|
|
@ -125,7 +125,7 @@ class ClamdScanner:
|
|||
self.sock.close()
|
||||
|
||||
|
||||
def canonical_clamav_conf ():
|
||||
def canonical_clamav_conf():
|
||||
"""Default clamav configs for various platforms."""
|
||||
if os.name == 'posix':
|
||||
clamavconf = "/etc/clamav/clamd.conf"
|
||||
|
|
@ -143,16 +143,16 @@ def get_clamav_conf(filename):
|
|||
log.warn(LOG_PLUGIN, "No ClamAV config file found at %r.", filename)
|
||||
|
||||
|
||||
def get_sockinfo (host, port=None):
|
||||
def get_sockinfo(host, port=None):
|
||||
"""Return socket.getaddrinfo for given host and port."""
|
||||
family, socktype = socket.AF_INET, socket.SOCK_STREAM
|
||||
return socket.getaddrinfo(host, port, family, socktype)
|
||||
|
||||
|
||||
class ClamavConfig (dict):
|
||||
class ClamavConfig(dict):
|
||||
"""Clamav configuration wrapper, with clamd connection method."""
|
||||
|
||||
def __init__ (self, filename):
|
||||
def __init__(self, filename):
|
||||
"""Parse clamav configuration file."""
|
||||
super(ClamavConfig, self).__init__()
|
||||
self.parseconf(filename)
|
||||
|
|
@ -161,7 +161,7 @@ class ClamavConfig (dict):
|
|||
if self.get('TCPSocket') and self.get('LocalSocket'):
|
||||
raise ClamavError(_("only one of TCPSocket and LocalSocket must be enabled"))
|
||||
|
||||
def parseconf (self, filename):
|
||||
def parseconf(self, filename):
|
||||
"""Parse clamav configuration from given file."""
|
||||
with open(filename) as fd:
|
||||
# yet another config format, sigh
|
||||
|
|
@ -176,7 +176,7 @@ class ClamavConfig (dict):
|
|||
else:
|
||||
self[split[0]] = split[1]
|
||||
|
||||
def new_connection (self):
|
||||
def new_connection(self):
|
||||
"""Connect to clamd for stream scanning.
|
||||
|
||||
@return: tuple (connected socket, host)
|
||||
|
|
@ -191,7 +191,7 @@ class ClamavConfig (dict):
|
|||
raise ClamavError(_("one of TCPSocket or LocalSocket must be enabled"))
|
||||
return sock, host
|
||||
|
||||
def create_local_socket (self):
|
||||
def create_local_socket(self):
|
||||
"""Create local socket, connect to it and return socket object."""
|
||||
sock = create_socket(socket.AF_UNIX, socket.SOCK_STREAM)
|
||||
addr = self['LocalSocket']
|
||||
|
|
@ -202,7 +202,7 @@ class ClamavConfig (dict):
|
|||
raise
|
||||
return sock
|
||||
|
||||
def create_tcp_socket (self, host):
|
||||
def create_tcp_socket(self, host):
|
||||
"""Create tcp socket, connect to it and return socket object."""
|
||||
port = int(self['TCPSocket'])
|
||||
sockinfo = get_sockinfo(host, port=port)
|
||||
|
|
@ -215,7 +215,7 @@ class ClamavConfig (dict):
|
|||
return sock
|
||||
|
||||
|
||||
def scan (data, clamconf):
|
||||
def scan(data, clamconf):
|
||||
"""Scan data for viruses.
|
||||
@return (infection msgs, errors)
|
||||
@rtype ([], [])
|
||||
|
|
|
|||
|
|
@ -35,7 +35,7 @@ class RobotFileParser:
|
|||
"""This class provides a set of methods to read, parse and answer
|
||||
questions about a single robots.txt file."""
|
||||
|
||||
def __init__ (self, url='', session=None, proxies=None, auth=None):
|
||||
def __init__(self, url='', session=None, proxies=None, auth=None):
|
||||
"""Initialize internal entry lists and store given url and
|
||||
credentials."""
|
||||
self.set_url(url)
|
||||
|
|
@ -47,7 +47,7 @@ class RobotFileParser:
|
|||
self.auth = auth
|
||||
self._reset()
|
||||
|
||||
def _reset (self):
|
||||
def _reset(self):
|
||||
"""Reset internal flags and entry lists."""
|
||||
self.entries = []
|
||||
self.default_entry = None
|
||||
|
|
@ -58,7 +58,7 @@ class RobotFileParser:
|
|||
self.sitemap_urls = []
|
||||
self.encoding = None
|
||||
|
||||
def mtime (self):
|
||||
def mtime(self):
|
||||
"""Returns the time the robots.txt file was last fetched.
|
||||
|
||||
This is useful for long-running web spiders that need to
|
||||
|
|
@ -69,17 +69,17 @@ class RobotFileParser:
|
|||
"""
|
||||
return self.last_checked
|
||||
|
||||
def modified (self):
|
||||
def modified(self):
|
||||
"""Set the time the robots.txt file was last fetched to the
|
||||
current time."""
|
||||
self.last_checked = time.time()
|
||||
|
||||
def set_url (self, url):
|
||||
def set_url(self, url):
|
||||
"""Set the URL referring to a robots.txt file."""
|
||||
self.url = url
|
||||
self.host, self.path = urllib.parse.urlparse(url)[1:3]
|
||||
|
||||
def read (self):
|
||||
def read(self):
|
||||
"""Read the robots.txt URL and feeds it to the parser."""
|
||||
self._reset()
|
||||
kwargs = dict(
|
||||
|
|
@ -116,7 +116,7 @@ class RobotFileParser:
|
|||
self.allow_all = True
|
||||
log.debug(LOG_CHECK, "%r allow all (request error)", self.url)
|
||||
|
||||
def _add_entry (self, entry):
|
||||
def _add_entry(self, entry):
|
||||
"""Add a parsed entry to entry list.
|
||||
|
||||
@return: None
|
||||
|
|
@ -127,7 +127,7 @@ class RobotFileParser:
|
|||
else:
|
||||
self.entries.append(entry)
|
||||
|
||||
def parse (self, lines):
|
||||
def parse(self, lines):
|
||||
"""Parse the input lines from a robot.txt file.
|
||||
We allow that a user-agent: line is not preceded by
|
||||
one or more blank lines.
|
||||
|
|
@ -210,7 +210,7 @@ class RobotFileParser:
|
|||
self.modified()
|
||||
log.debug(LOG_CHECK, "Parsed rules:\n%s", str(self))
|
||||
|
||||
def can_fetch (self, useragent, url):
|
||||
def can_fetch(self, useragent, url):
|
||||
"""Using the parsed robots.txt decide if useragent can fetch url.
|
||||
|
||||
@return: True if agent can fetch url, else False
|
||||
|
|
@ -240,7 +240,7 @@ class RobotFileParser:
|
|||
log.debug(LOG_CHECK, " ... agent not found, allow.")
|
||||
return True
|
||||
|
||||
def get_crawldelay (self, useragent):
|
||||
def get_crawldelay(self, useragent):
|
||||
"""Look for a configured crawl delay.
|
||||
|
||||
@return: crawl delay in seconds or zero
|
||||
|
|
@ -251,7 +251,7 @@ class RobotFileParser:
|
|||
return entry.crawldelay
|
||||
return 0
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Constructs string representation, usable as contents of a
|
||||
robots.txt file.
|
||||
|
||||
|
|
@ -269,7 +269,7 @@ class RuleLine:
|
|||
(allowance==0) followed by a path.
|
||||
"""
|
||||
|
||||
def __init__ (self, path, allowance):
|
||||
def __init__(self, path, allowance):
|
||||
"""Initialize with given path and allowance info."""
|
||||
if path == '' and not allowance:
|
||||
# an empty value means allow all
|
||||
|
|
@ -278,7 +278,7 @@ class RuleLine:
|
|||
self.path = urllib.parse.quote(path)
|
||||
self.allowance = allowance
|
||||
|
||||
def applies_to (self, path):
|
||||
def applies_to(self, path):
|
||||
"""Look if given path applies to this rule.
|
||||
|
||||
@return: True if pathname applies to this rule, else False
|
||||
|
|
@ -286,7 +286,7 @@ class RuleLine:
|
|||
"""
|
||||
return self.path == "*" or path.startswith(self.path)
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""Construct string representation in robots.txt format.
|
||||
|
||||
@return: robots.txt format
|
||||
|
|
@ -298,13 +298,13 @@ class RuleLine:
|
|||
class Entry:
|
||||
"""An entry has one or more user-agents and zero or more rulelines."""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Initialize user agent and rule list."""
|
||||
self.useragents = []
|
||||
self.rulelines = []
|
||||
self.crawldelay = 0
|
||||
|
||||
def __str__ (self):
|
||||
def __str__(self):
|
||||
"""string representation in robots.txt format.
|
||||
|
||||
@return: robots.txt format
|
||||
|
|
@ -316,7 +316,7 @@ class Entry:
|
|||
lines.extend([str(line) for line in self.rulelines])
|
||||
return "\n".join(lines)
|
||||
|
||||
def applies_to (self, useragent):
|
||||
def applies_to(self, useragent):
|
||||
"""Check if this entry applies to the specified agent.
|
||||
|
||||
@return: True if this entry applies to the agent, else False.
|
||||
|
|
@ -333,7 +333,7 @@ class Entry:
|
|||
return True
|
||||
return False
|
||||
|
||||
def allowance (self, filename):
|
||||
def allowance(self, filename):
|
||||
"""Preconditions:
|
||||
- our agent applies to this entry
|
||||
- filename is URL decoded
|
||||
|
|
|
|||
|
|
@ -34,7 +34,7 @@ if socket.has_ipv6:
|
|||
raise
|
||||
|
||||
|
||||
def create_socket (family, socktype, proto=0, timeout=60):
|
||||
def create_socket(family, socktype, proto=0, timeout=60):
|
||||
"""
|
||||
Create a socket with given family and type. If SSL context
|
||||
is given an SSL socket is created.
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ from . import i18n
|
|||
from builtins import str as str_text
|
||||
|
||||
|
||||
def unicode_safe (s, encoding=i18n.default_encoding, errors='replace'):
|
||||
def unicode_safe(s, encoding=i18n.default_encoding, errors='replace'):
|
||||
"""Get unicode string without raising encoding errors. Unknown
|
||||
characters of the given encoding will be ignored.
|
||||
|
||||
|
|
@ -59,7 +59,7 @@ def unicode_safe (s, encoding=i18n.default_encoding, errors='replace'):
|
|||
return str(s)
|
||||
|
||||
|
||||
def ascii_safe (s):
|
||||
def ascii_safe(s):
|
||||
"""Get ASCII string without raising encoding errors. Unknown
|
||||
characters of the given encoding will be ignored.
|
||||
|
||||
|
|
@ -73,7 +73,7 @@ def ascii_safe (s):
|
|||
return s
|
||||
|
||||
|
||||
def is_ascii (s):
|
||||
def is_ascii(s):
|
||||
"""Test if a string can be encoded in ASCII."""
|
||||
try:
|
||||
s.encode('ascii', 'strict')
|
||||
|
|
@ -82,7 +82,7 @@ def is_ascii (s):
|
|||
return False
|
||||
|
||||
|
||||
def is_encoding (text):
|
||||
def is_encoding(text):
|
||||
"""Check if string is a valid encoding."""
|
||||
try:
|
||||
return codecs.lookup(text)
|
||||
|
|
@ -90,12 +90,12 @@ def is_encoding (text):
|
|||
return False
|
||||
|
||||
|
||||
def url_unicode_split (url):
|
||||
def url_unicode_split(url):
|
||||
"""Like urllib.parse.urlsplit(), but always returning unicode parts."""
|
||||
return [unicode_safe(s) for s in urllib.parse.urlsplit(url)]
|
||||
|
||||
|
||||
def unquote (s, matching=False):
|
||||
def unquote(s, matching=False):
|
||||
"""Remove leading and ending single and double quotes.
|
||||
The quotes need to match if matching is True. Only one quote from each
|
||||
end will be stripped.
|
||||
|
|
@ -124,7 +124,7 @@ _para_posix = r"(?:%(sep)s)(?:(?:%(sep)s)\s*)+" % {'sep': '\n'}
|
|||
_para_win = r"(?:%(sep)s)(?:(?:%(sep)s)\s*)+" % {'sep': '\r\n'}
|
||||
_para_ro = re.compile("%s|%s|%s" % (_para_mac, _para_posix, _para_win))
|
||||
|
||||
def get_paragraphs (text):
|
||||
def get_paragraphs(text):
|
||||
"""A new paragraph is considered to start at a line which follows
|
||||
one or more blank lines (lines containing nothing or just spaces).
|
||||
The first line of the text also starts a paragraph."""
|
||||
|
|
@ -133,7 +133,7 @@ def get_paragraphs (text):
|
|||
return _para_ro.split(text)
|
||||
|
||||
|
||||
def wrap (text, width, **kwargs):
|
||||
def wrap(text, width, **kwargs):
|
||||
"""Adjust lines of text to be not longer than width. The text will be
|
||||
returned unmodified if width <= 0.
|
||||
See textwrap.wrap() for a list of supported kwargs.
|
||||
|
|
@ -147,13 +147,13 @@ def wrap (text, width, **kwargs):
|
|||
return os.linesep.join(ret)
|
||||
|
||||
|
||||
def indent (text, indent_string=" "):
|
||||
def indent(text, indent_string=" "):
|
||||
"""Indent each line of text with the given indent string."""
|
||||
return os.linesep.join("%s%s" % (indent_string, x)
|
||||
for x in text.splitlines())
|
||||
|
||||
|
||||
def get_line_number (s, index):
|
||||
def get_line_number(s, index):
|
||||
r"""Return the line number of s[index] or zero on errors.
|
||||
Lines are assumed to be separated by the ASCII character '\n'."""
|
||||
i = 0
|
||||
|
|
@ -167,14 +167,14 @@ def get_line_number (s, index):
|
|||
return line
|
||||
|
||||
|
||||
def paginate (text):
|
||||
def paginate(text):
|
||||
"""Print text in pages of lines."""
|
||||
pydoc.pager(text)
|
||||
|
||||
|
||||
_markup_re = re.compile("<.*?>", re.DOTALL)
|
||||
|
||||
def remove_markup (s):
|
||||
def remove_markup(s):
|
||||
"""Remove all <*> html markup tags from s."""
|
||||
mo = _markup_re.search(s)
|
||||
while mo:
|
||||
|
|
@ -183,7 +183,7 @@ def remove_markup (s):
|
|||
return s
|
||||
|
||||
|
||||
def strsize (b, grouping=True):
|
||||
def strsize(b, grouping=True):
|
||||
"""Return human representation of bytes b. A negative number of bytes
|
||||
raises a value error."""
|
||||
if b < 0:
|
||||
|
|
@ -203,13 +203,13 @@ def strsize (b, grouping=True):
|
|||
return "%sGB" % locale.format_string("%.1f", (float(b) / (1024*1024*1024)), grouping)
|
||||
|
||||
|
||||
def strtime (t, func=time.localtime):
|
||||
def strtime(t, func=time.localtime):
|
||||
"""Return ISO 8601 formatted time."""
|
||||
return time.strftime("%Y-%m-%d %H:%M:%S", func(t)) + strtimezone()
|
||||
|
||||
|
||||
# from quodlibet
|
||||
def strduration (duration):
|
||||
def strduration(duration):
|
||||
"""Turn a time value in seconds into hh:mm:ss or mm:ss."""
|
||||
if duration < 0:
|
||||
duration = abs(duration)
|
||||
|
|
@ -229,7 +229,7 @@ def strduration (duration):
|
|||
|
||||
|
||||
# from quodlibet
|
||||
def strduration_long (duration, do_translate=True):
|
||||
def strduration_long(duration, do_translate=True):
|
||||
"""Turn a time value in seconds into x hours, x minutes, etc."""
|
||||
if do_translate:
|
||||
# use global translator functions
|
||||
|
|
@ -275,7 +275,7 @@ def strduration_long (duration, do_translate=True):
|
|||
return "%s%s" % (prefix, ", ".join(time_str))
|
||||
|
||||
|
||||
def strtimezone ():
|
||||
def strtimezone():
|
||||
"""Return timezone info, %z on some platforms, but not supported on all.
|
||||
"""
|
||||
if time.daylight:
|
||||
|
|
@ -293,7 +293,7 @@ def stripurl(s):
|
|||
return s.splitlines()[0].strip()
|
||||
|
||||
|
||||
def limit (s, length=72):
|
||||
def limit(s, length=72):
|
||||
"""If the length of the string exceeds the given limit, it will be cut
|
||||
off and three dots will be appended.
|
||||
|
||||
|
|
@ -311,12 +311,12 @@ def limit (s, length=72):
|
|||
return "%s..." % s[:length]
|
||||
|
||||
|
||||
def strline (s):
|
||||
def strline(s):
|
||||
"""Display string representation on one line."""
|
||||
return strip_control_chars("`%s'" % s.replace("\n", "\\n"))
|
||||
|
||||
|
||||
def format_feature_warning (**kwargs):
|
||||
def format_feature_warning(**kwargs):
|
||||
"""Format warning that a module could not be imported and that it should
|
||||
be installed for a certain URL.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -19,19 +19,19 @@ Support for managing threads.
|
|||
import threading
|
||||
|
||||
|
||||
class StoppableThread (threading.Thread):
|
||||
class StoppableThread(threading.Thread):
|
||||
"""Thread class with a stop() method. The thread itself has to check
|
||||
regularly for the stopped() condition."""
|
||||
|
||||
def __init__ (self):
|
||||
def __init__(self):
|
||||
"""Store stop event."""
|
||||
super(StoppableThread, self).__init__()
|
||||
self._stopper = threading.Event()
|
||||
|
||||
def stop (self):
|
||||
def stop(self):
|
||||
"""Set stop event."""
|
||||
self._stopper.set()
|
||||
|
||||
def stopped (self, timeout=None):
|
||||
def stopped(self, timeout=None):
|
||||
"""Return True if stop event is set."""
|
||||
return self._stopper.wait(timeout)
|
||||
|
|
|
|||
|
|
@ -24,7 +24,7 @@ _trace_ignore = set()
|
|||
_trace_filter = set()
|
||||
|
||||
|
||||
def trace_ignore (names):
|
||||
def trace_ignore(names):
|
||||
"""Add given names to trace ignore set, or clear set if names is None."""
|
||||
if names is None:
|
||||
_trace_ignore.clear()
|
||||
|
|
@ -32,7 +32,7 @@ def trace_ignore (names):
|
|||
_trace_ignore.update(names)
|
||||
|
||||
|
||||
def trace_filter (patterns):
|
||||
def trace_filter(patterns):
|
||||
"""Add given patterns to trace filter set or clear set if patterns is
|
||||
None."""
|
||||
if patterns is None:
|
||||
|
|
@ -41,7 +41,7 @@ def trace_filter (patterns):
|
|||
_trace_filter.update(re.compile(pat) for pat in patterns)
|
||||
|
||||
|
||||
def _trace (frame, event, arg):
|
||||
def _trace(frame, event, arg):
|
||||
"""Trace function calls."""
|
||||
if event in ('call', 'c_call'):
|
||||
_trace_line(frame, event, arg)
|
||||
|
|
@ -53,7 +53,7 @@ def _trace (frame, event, arg):
|
|||
return _trace
|
||||
|
||||
|
||||
def _trace_full (frame, event, arg):
|
||||
def _trace_full(frame, event, arg):
|
||||
"""Trace every executed line."""
|
||||
if event == "line":
|
||||
_trace_line(frame, event, arg)
|
||||
|
|
@ -62,7 +62,7 @@ def _trace_full (frame, event, arg):
|
|||
return _trace_full
|
||||
|
||||
|
||||
def _trace_line (frame, event, arg):
|
||||
def _trace_line(frame, event, arg):
|
||||
"""Print current executed line."""
|
||||
name = frame.f_globals["__name__"]
|
||||
if name in _trace_ignore:
|
||||
|
|
@ -82,7 +82,7 @@ def _trace_line (frame, event, arg):
|
|||
print("THREAD(%d) %r %.2f %s # %s:%d" % args)
|
||||
|
||||
|
||||
def trace_on (full=False):
|
||||
def trace_on(full=False):
|
||||
"""Start tracing of the current thread (and the current thread only)."""
|
||||
if full:
|
||||
sys.settrace(_trace_full)
|
||||
|
|
@ -90,6 +90,6 @@ def trace_on (full=False):
|
|||
sys.settrace(_trace)
|
||||
|
||||
|
||||
def trace_off ():
|
||||
def trace_off():
|
||||
"""Stop tracing of the current thread (and the current thread only)."""
|
||||
sys.settrace(None)
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ else:
|
|||
URL_TAG = 'Source-Package-URL:'
|
||||
|
||||
|
||||
def check_update ():
|
||||
def check_update():
|
||||
"""Return the following values:
|
||||
(False, errmsg) - online version could not be determined
|
||||
(True, None) - user has newest version
|
||||
|
|
@ -53,7 +53,7 @@ def check_update ():
|
|||
return True, (version, None)
|
||||
|
||||
|
||||
def get_online_version ():
|
||||
def get_online_version():
|
||||
"""Download update info and parse it."""
|
||||
# prevent getting a cached answer
|
||||
headers = {'Pragma': 'no-cache', 'Cache-Control': 'no-cache'}
|
||||
|
|
@ -70,6 +70,6 @@ def get_online_version ():
|
|||
return version, url
|
||||
|
||||
|
||||
def is_newer_version (version):
|
||||
def is_newer_version(version):
|
||||
"""Check if given version is newer than current version."""
|
||||
return StrictVersion(version) > StrictVersion(CurrentVersion)
|
||||
|
|
|
|||
|
|
@ -87,7 +87,7 @@ is_safe_fragment = re.compile("(?i)^%s$" % _safe_fragment_pattern).match
|
|||
|
||||
|
||||
# snatched form urlparse.py
|
||||
def splitparams (path):
|
||||
def splitparams(path):
|
||||
"""Split off parameter part from path.
|
||||
Returns tuple (path-without-param, param)
|
||||
"""
|
||||
|
|
@ -100,7 +100,7 @@ def splitparams (path):
|
|||
return path[:i], path[i+1:]
|
||||
|
||||
|
||||
def is_numeric_port (portstr):
|
||||
def is_numeric_port(portstr):
|
||||
"""return: integer port (== True) iff portstr is a valid port number,
|
||||
False otherwise
|
||||
"""
|
||||
|
|
@ -112,13 +112,13 @@ def is_numeric_port (portstr):
|
|||
return False
|
||||
|
||||
|
||||
def safe_host_pattern (host):
|
||||
def safe_host_pattern(host):
|
||||
"""Return regular expression pattern with given host for URL testing."""
|
||||
return "(?i)%s://%s%s(#%s)?" % \
|
||||
(_safe_scheme_pattern, host, _safe_path_pattern, _safe_fragment_pattern)
|
||||
|
||||
|
||||
def parse_qsl (qs, encoding, keep_blank_values=0, strict_parsing=0):
|
||||
def parse_qsl(qs, encoding, keep_blank_values=0, strict_parsing=0):
|
||||
"""Parse a query given as a string argument.
|
||||
|
||||
@param qs: URL-encoded query string to be parsed
|
||||
|
|
@ -168,7 +168,7 @@ def parse_qsl (qs, encoding, keep_blank_values=0, strict_parsing=0):
|
|||
return r
|
||||
|
||||
|
||||
def idna_encode (host):
|
||||
def idna_encode(host):
|
||||
"""Encode hostname as internationalized domain name (IDN) according
|
||||
to RFC 3490.
|
||||
@raise: UnicodeError if hostname is not properly IDN encoded.
|
||||
|
|
@ -183,7 +183,7 @@ def idna_encode (host):
|
|||
return host, False
|
||||
|
||||
|
||||
def url_fix_host (urlparts, encoding):
|
||||
def url_fix_host(urlparts, encoding):
|
||||
"""Unquote and fix hostname. Returns is_idn."""
|
||||
if not urlparts[1]:
|
||||
urlparts[2] = urllib.parse.unquote(urlparts[2], encoding=encoding)
|
||||
|
|
@ -229,7 +229,7 @@ def url_fix_host (urlparts, encoding):
|
|||
return is_idn
|
||||
|
||||
|
||||
def url_fix_common_typos (url):
|
||||
def url_fix_common_typos(url):
|
||||
"""Fix common typos in given URL like forgotten colon."""
|
||||
if url.startswith("http//"):
|
||||
url = "http://" + url[6:]
|
||||
|
|
@ -238,7 +238,7 @@ def url_fix_common_typos (url):
|
|||
return url
|
||||
|
||||
|
||||
def url_fix_mailto_urlsplit (urlparts):
|
||||
def url_fix_mailto_urlsplit(urlparts):
|
||||
"""Split query part of mailto url if found."""
|
||||
sep = b"?" if isinstance(urlparts[2], bytes) else "?"
|
||||
if sep in urlparts[2]:
|
||||
|
|
@ -252,7 +252,7 @@ wayback_regex = re.compile(r'(https?)(\%3A/|:/)')
|
|||
def url_fix_wayback_query(path):
|
||||
return wayback_regex.sub(r'\1://', path)
|
||||
|
||||
def url_parse_query (query, encoding):
|
||||
def url_parse_query(query, encoding):
|
||||
"""Parse and re-join the given CGI query."""
|
||||
# if ? is in the query, split it off, seen at msdn.microsoft.com
|
||||
append = ""
|
||||
|
|
@ -273,7 +273,7 @@ def url_parse_query (query, encoding):
|
|||
return ''.join(l) + append
|
||||
|
||||
|
||||
def urlunsplit (urlparts):
|
||||
def urlunsplit(urlparts):
|
||||
"""Same as urllib.parse.urlunsplit but with extra UNC path handling
|
||||
for Windows OS."""
|
||||
res = urllib.parse.urlunsplit(urlparts)
|
||||
|
|
@ -286,7 +286,7 @@ def urlunsplit (urlparts):
|
|||
return res
|
||||
|
||||
|
||||
def url_norm (url, encoding):
|
||||
def url_norm(url, encoding):
|
||||
"""Normalize the given URL which must be quoted. Supports unicode
|
||||
hostnames (IDNA encoding) according to RFC 3490.
|
||||
|
||||
|
|
@ -335,7 +335,7 @@ _thisdir_ro = re.compile(r"^\./")
|
|||
_samedir_ro = re.compile(r"/\./|/\.$")
|
||||
_parentdir_ro = re.compile(r"^/(\.\./)+|/(?!\.\./)[^/]+/\.\.(/|$)")
|
||||
_relparentdir_ro = re.compile(r"^(?!\.\./)[^/]+/\.\.(/|$)")
|
||||
def collapse_segments (path):
|
||||
def collapse_segments(path):
|
||||
"""Remove all redundant segments from the given URL path.
|
||||
Precondition: path is an unquoted url path"""
|
||||
# replace backslashes
|
||||
|
|
@ -371,7 +371,7 @@ def collapse_segments (path):
|
|||
url_is_absolute = re.compile(r"^[-\.a-z]+:", re.I).match
|
||||
|
||||
|
||||
def url_quote (url, encoding):
|
||||
def url_quote(url, encoding):
|
||||
"""Quote given URL."""
|
||||
if not url_is_absolute(url):
|
||||
return document_quote(url)
|
||||
|
|
@ -393,7 +393,7 @@ def url_quote (url, encoding):
|
|||
return urlunsplit(urlparts)
|
||||
|
||||
|
||||
def document_quote (document):
|
||||
def document_quote(document):
|
||||
"""Quote given document."""
|
||||
doc, query = urllib.parse.splitquery(document)
|
||||
doc = urllib.parse.quote(doc, safe='/=,')
|
||||
|
|
@ -402,7 +402,7 @@ def document_quote (document):
|
|||
return doc
|
||||
|
||||
|
||||
def match_url (url, domainlist):
|
||||
def match_url(url, domainlist):
|
||||
"""Return True if host part of url matches an entry in given domain list.
|
||||
"""
|
||||
if not url:
|
||||
|
|
@ -410,7 +410,7 @@ def match_url (url, domainlist):
|
|||
return match_host(url_split(url)[1], domainlist)
|
||||
|
||||
|
||||
def match_host (host, domainlist):
|
||||
def match_host(host, domainlist):
|
||||
"""Return True if host matches an entry in given domain list."""
|
||||
if not host:
|
||||
return False
|
||||
|
|
@ -428,7 +428,7 @@ if os.name == 'nt':
|
|||
_nopathquote_chars += "|"
|
||||
_safe_url_chars = re.escape(_nopathquote_chars + "_:.&#%?[]!")+"a-zA-Z0-9"
|
||||
_safe_url_chars_ro = re.compile(r"^[%s]*$" % _safe_url_chars)
|
||||
def url_needs_quoting (url):
|
||||
def url_needs_quoting(url):
|
||||
"""Check if url needs percent quoting. Note that the method does
|
||||
only check basic character sets, and not any other syntax.
|
||||
The URL might still be syntactically incorrect even when
|
||||
|
|
@ -441,7 +441,7 @@ def url_needs_quoting (url):
|
|||
return not _safe_url_chars_ro.match(url)
|
||||
|
||||
|
||||
def url_split (url):
|
||||
def url_split(url):
|
||||
"""Split url in a tuple (scheme, hostname, port, document) where
|
||||
hostname is always lowercased.
|
||||
Precondition: url is syntactically correct URI (eg has no whitespace)
|
||||
|
|
@ -455,14 +455,14 @@ def url_split (url):
|
|||
return scheme, host, port, document
|
||||
|
||||
|
||||
def url_unsplit (parts):
|
||||
def url_unsplit(parts):
|
||||
"""Rejoin URL parts to a string."""
|
||||
if parts[2] == default_ports.get(parts[0]):
|
||||
return "%s://%s%s" % (parts[0], parts[1], parts[3])
|
||||
return "%s://%s:%d%s" % parts
|
||||
|
||||
|
||||
def splitport (host, port=0):
|
||||
def splitport(host, port=0):
|
||||
"""Split optional port number from host. If host has no port number,
|
||||
the given default port is returned.
|
||||
|
||||
|
|
|
|||
|
|
@ -15,7 +15,7 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""Windows utility functions."""
|
||||
|
||||
def get_shell_folder (name):
|
||||
def get_shell_folder(name):
|
||||
"""Get Windows Shell Folder locations from the registry."""
|
||||
try:
|
||||
import _winreg as winreg
|
||||
|
|
|
|||
|
|
@ -196,7 +196,7 @@ file entry:
|
|||
Epilog = "\n".join((Examples, LoggerTypes, RegularExpressions, CookieFormat, ProxySupport, Notes, Retval, Warnings))
|
||||
|
||||
|
||||
def has_encoding (encoding):
|
||||
def has_encoding(encoding):
|
||||
"""Detect if Python can encode in a certain encoding."""
|
||||
try:
|
||||
codecs.lookup(encoding)
|
||||
|
|
@ -376,7 +376,7 @@ if has_argcomplete:
|
|||
argcomplete.autocomplete(argparser)
|
||||
|
||||
|
||||
def read_stdin_urls ():
|
||||
def read_stdin_urls():
|
||||
"""Read list of URLs, separated by white-space, from stdin."""
|
||||
num = 0
|
||||
while True:
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ import codecs
|
|||
import html
|
||||
from linkcheck import strformat
|
||||
|
||||
def main (filename):
|
||||
def main(filename):
|
||||
om = print_memorydump(filename)
|
||||
dirname, basename = os.path.split(filename)
|
||||
basename = os.path.splitext(basename)[0]
|
||||
|
|
|
|||
50
setup.py
50
setup.py
|
|
@ -60,12 +60,12 @@ def get_long_description():
|
|||
except:
|
||||
return Description
|
||||
|
||||
def normpath (path):
|
||||
def normpath(path):
|
||||
"""Norm a path name to platform specific notation."""
|
||||
return os.path.normpath(path)
|
||||
|
||||
|
||||
def cnormpath (path):
|
||||
def cnormpath(path):
|
||||
"""Norm a path name to platform specific notation and make it absolute."""
|
||||
path = normpath(path)
|
||||
if os.name == 'nt':
|
||||
|
|
@ -77,7 +77,7 @@ def cnormpath (path):
|
|||
|
||||
|
||||
release_ro = re.compile(r"\(released (.+)\)")
|
||||
def get_release_date ():
|
||||
def get_release_date():
|
||||
"""Parse and return relase date as string from doc/changelog.txt."""
|
||||
fname = os.path.join("doc", "changelog.txt")
|
||||
release_date = "unknown"
|
||||
|
|
@ -95,10 +95,10 @@ def get_portable():
|
|||
return os.environ.get('LINKCHECKER_PORTABLE', '0')
|
||||
|
||||
|
||||
class MyInstallLib (install_lib):
|
||||
class MyInstallLib(install_lib):
|
||||
"""Custom library installation."""
|
||||
|
||||
def install (self):
|
||||
def install(self):
|
||||
"""Install the generated config file."""
|
||||
outs = super(MyInstallLib, self).install()
|
||||
infile = self.create_conf_file()
|
||||
|
|
@ -107,7 +107,7 @@ class MyInstallLib (install_lib):
|
|||
outs.append(outfile)
|
||||
return outs
|
||||
|
||||
def create_conf_file (self):
|
||||
def create_conf_file(self):
|
||||
"""Create configuration file."""
|
||||
cmd_obj = self.distribution.get_command_obj("install")
|
||||
cmd_obj.ensure_finalized()
|
||||
|
|
@ -141,11 +141,11 @@ class MyInstallLib (install_lib):
|
|||
self.distribution.create_conf_file(data, directory=self.install_lib)
|
||||
return self.get_conf_output()
|
||||
|
||||
def get_conf_output (self):
|
||||
def get_conf_output(self):
|
||||
"""Get name of configuration file."""
|
||||
return self.distribution.get_conf_filename(self.install_lib)
|
||||
|
||||
def get_outputs (self):
|
||||
def get_outputs(self):
|
||||
"""Add the generated config file to the list of outputs."""
|
||||
outs = super(MyInstallLib, self).get_outputs()
|
||||
conf_output = self.get_conf_output()
|
||||
|
|
@ -155,16 +155,16 @@ class MyInstallLib (install_lib):
|
|||
return outs
|
||||
|
||||
|
||||
class MyInstallData (install_data):
|
||||
class MyInstallData(install_data):
|
||||
"""Fix file permissions."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Adjust permissions on POSIX systems."""
|
||||
self.install_translations()
|
||||
super(MyInstallData, self).run()
|
||||
self.fix_permissions()
|
||||
|
||||
def install_translations (self):
|
||||
def install_translations(self):
|
||||
"""Install compiled gettext catalogs."""
|
||||
# A hack to fix https://github.com/linkchecker/linkchecker/issues/102
|
||||
i18n_files = []
|
||||
|
|
@ -191,7 +191,7 @@ class MyInstallData (install_data):
|
|||
(out, _) = self.copy_file(data, dest)
|
||||
self.outfiles.append(out)
|
||||
|
||||
def fix_permissions (self):
|
||||
def fix_permissions(self):
|
||||
"""Set correct read permissions on POSIX systems. Might also
|
||||
be possible by setting umask?"""
|
||||
if os.name == 'posix' and not self.dry_run:
|
||||
|
|
@ -205,15 +205,15 @@ class MyInstallData (install_data):
|
|||
os.chmod(path, mode)
|
||||
|
||||
|
||||
class MyDistribution (Distribution):
|
||||
class MyDistribution(Distribution):
|
||||
"""Custom distribution class generating config file."""
|
||||
|
||||
def __init__ (self, attrs):
|
||||
def __init__(self, attrs):
|
||||
"""Set console and windows scripts."""
|
||||
super(MyDistribution, self).__init__(attrs)
|
||||
self.console = ['linkchecker']
|
||||
|
||||
def run_commands (self):
|
||||
def run_commands(self):
|
||||
"""Generate config file and run commands."""
|
||||
cwd = os.getcwd()
|
||||
data = []
|
||||
|
|
@ -223,11 +223,11 @@ class MyDistribution (Distribution):
|
|||
self.create_conf_file(data)
|
||||
super(MyDistribution, self).run_commands()
|
||||
|
||||
def get_conf_filename (self, directory):
|
||||
def get_conf_filename(self, directory):
|
||||
"""Get name for config file."""
|
||||
return os.path.join(directory, "_%s_configdata.py" % self.get_name())
|
||||
|
||||
def create_conf_file (self, data, directory=None):
|
||||
def create_conf_file(self, data, directory=None):
|
||||
"""Create local config file from given data (list of lines) in
|
||||
the directory (or current directory if not given)."""
|
||||
data.insert(0, "# this file is automatically created by setup.py")
|
||||
|
|
@ -253,7 +253,7 @@ class MyDistribution (Distribution):
|
|||
"creating %s" % filename, self.verbose >= 1, self.dry_run)
|
||||
|
||||
|
||||
def list_message_files (package, suffix=".mo"):
|
||||
def list_message_files(package, suffix=".mo"):
|
||||
"""Return list of all found message files and their installation paths."""
|
||||
for fname in glob.glob("po/*" + suffix):
|
||||
# basename (without extension) is a locale name
|
||||
|
|
@ -263,7 +263,7 @@ def list_message_files (package, suffix=".mo"):
|
|||
"share", "locale", localename, "LC_MESSAGES", domainname))
|
||||
|
||||
|
||||
def check_manifest ():
|
||||
def check_manifest():
|
||||
"""Snatched from roundup.sf.net.
|
||||
Check that the files listed in the MANIFEST are present when the
|
||||
source is unpacked."""
|
||||
|
|
@ -284,19 +284,19 @@ def check_manifest ():
|
|||
print('\nMissing: '.join(err))
|
||||
|
||||
|
||||
class MyBuild (build):
|
||||
class MyBuild(build):
|
||||
"""Custom build command."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Check MANIFEST before building."""
|
||||
check_manifest()
|
||||
build.run(self)
|
||||
|
||||
|
||||
class MyClean (clean):
|
||||
class MyClean(clean):
|
||||
"""Custom clean command."""
|
||||
|
||||
def run (self):
|
||||
def run(self):
|
||||
"""Remove share directory on clean."""
|
||||
if self.all:
|
||||
# remove share directory
|
||||
|
|
@ -308,10 +308,10 @@ class MyClean (clean):
|
|||
clean.run(self)
|
||||
|
||||
|
||||
class MySdist (sdist):
|
||||
class MySdist(sdist):
|
||||
"""Custom sdist command."""
|
||||
|
||||
def get_file_list (self):
|
||||
def get_file_list(self):
|
||||
"""Add MANIFEST to the file list."""
|
||||
super(MySdist, self).get_file_list()
|
||||
self.filelist.append("MANIFEST")
|
||||
|
|
|
|||
|
|
@ -29,7 +29,7 @@ basedir = os.path.dirname(__file__)
|
|||
linkchecker_cmd = os.path.join(os.path.dirname(basedir), "linkchecker")
|
||||
|
||||
|
||||
def run (cmd, verbosity=0, **kwargs):
|
||||
def run(cmd, verbosity=0, **kwargs):
|
||||
"""Run command without error checking.
|
||||
@return: command return code"""
|
||||
if kwargs.get("shell"):
|
||||
|
|
@ -38,7 +38,7 @@ def run (cmd, verbosity=0, **kwargs):
|
|||
return subprocess.call(cmd, **kwargs)
|
||||
|
||||
|
||||
def run_checked (cmd, ret_ok=(0,), **kwargs):
|
||||
def run_checked(cmd, ret_ok=(0,), **kwargs):
|
||||
"""Run command and raise OSError on error."""
|
||||
retcode = run(cmd, **kwargs)
|
||||
if retcode not in ret_ok:
|
||||
|
|
@ -48,7 +48,7 @@ def run_checked (cmd, ret_ok=(0,), **kwargs):
|
|||
|
||||
|
||||
|
||||
def run_silent (cmd):
|
||||
def run_silent(cmd):
|
||||
"""Run given command without output."""
|
||||
null = open(os.name == 'nt' and ':NUL' or "/dev/null", 'w')
|
||||
try:
|
||||
|
|
@ -57,11 +57,11 @@ def run_silent (cmd):
|
|||
null.close()
|
||||
|
||||
|
||||
def _need_func (testfunc, name):
|
||||
def _need_func(testfunc, name):
|
||||
"""Decorator skipping test if given testfunc fails."""
|
||||
def check_func (func):
|
||||
def check_func(func):
|
||||
@wraps(func)
|
||||
def newfunc (*args, **kwargs):
|
||||
def newfunc(*args, **kwargs):
|
||||
if not testfunc():
|
||||
pytest.skip("%s is not available" % name)
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -70,7 +70,7 @@ def _need_func (testfunc, name):
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_network ():
|
||||
def has_network():
|
||||
"""Test if network is up."""
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
|
@ -85,7 +85,7 @@ need_network = _need_func(has_network, "network")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_msgfmt ():
|
||||
def has_msgfmt():
|
||||
"""Test if msgfmt is available."""
|
||||
return run_silent(["msgfmt", "-V"]) == 0
|
||||
|
||||
|
|
@ -93,7 +93,7 @@ need_msgfmt = _need_func(has_msgfmt, "msgfmt")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_posix ():
|
||||
def has_posix():
|
||||
"""Test if this is a POSIX system."""
|
||||
return os.name == "posix"
|
||||
|
||||
|
|
@ -101,7 +101,7 @@ need_posix = _need_func(has_posix, "POSIX system")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_windows ():
|
||||
def has_windows():
|
||||
"""Test if this is a Windows system."""
|
||||
return os.name == "nt"
|
||||
|
||||
|
|
@ -109,7 +109,7 @@ need_windows = _need_func(has_windows, "Windows system")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_linux ():
|
||||
def has_linux():
|
||||
"""Test if this is a Linux system."""
|
||||
return sys.platform.startswith("linux")
|
||||
|
||||
|
|
@ -117,7 +117,7 @@ need_linux = _need_func(has_linux, "Linux system")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_clamav ():
|
||||
def has_clamav():
|
||||
"""Test if ClamAV daemon is installed and running."""
|
||||
try:
|
||||
cmd = ["grep", "LocalSocket", "/etc/clamav/clamd.conf"]
|
||||
|
|
@ -135,7 +135,7 @@ need_clamav = _need_func(has_clamav, "ClamAV")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_proxy ():
|
||||
def has_proxy():
|
||||
"""Test if proxy is running on port 8081."""
|
||||
try:
|
||||
s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
|
||||
|
|
@ -149,7 +149,7 @@ need_proxy = _need_func(has_proxy, "proxy")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_pyftpdlib ():
|
||||
def has_pyftpdlib():
|
||||
"""Test if pyftpdlib is available."""
|
||||
try:
|
||||
import pyftpdlib
|
||||
|
|
@ -161,7 +161,7 @@ need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_biplist ():
|
||||
def has_biplist():
|
||||
"""Test if biplist is available."""
|
||||
try:
|
||||
import biplist
|
||||
|
|
@ -173,7 +173,7 @@ need_biplist = _need_func(has_biplist, "biplist")
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_newsserver (server):
|
||||
def has_newsserver(server):
|
||||
import nntplib
|
||||
try:
|
||||
nntp = nntplib.NNTP(server, usenetrc=False)
|
||||
|
|
@ -183,10 +183,10 @@ def has_newsserver (server):
|
|||
return False
|
||||
|
||||
|
||||
def need_newsserver (server):
|
||||
def need_newsserver(server):
|
||||
"""Decorator skipping test if newsserver is not available."""
|
||||
def check_func (func):
|
||||
def newfunc (*args, **kwargs):
|
||||
def check_func(func):
|
||||
def newfunc(*args, **kwargs):
|
||||
if not has_newsserver(server):
|
||||
pytest.skip("Newsserver `%s' is not available" % server)
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -197,7 +197,7 @@ def need_newsserver (server):
|
|||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_x11 ():
|
||||
def has_x11():
|
||||
"""Test if DISPLAY variable is set."""
|
||||
return os.getenv('DISPLAY') is not None
|
||||
|
||||
|
|
@ -222,7 +222,7 @@ need_pdflib = _need_func(has_pdflib, 'pdflib')
|
|||
|
||||
|
||||
@contextmanager
|
||||
def _limit_time (seconds):
|
||||
def _limit_time(seconds):
|
||||
"""Raises LinkCheckerInterrupt if given number of seconds have passed."""
|
||||
if os.name == 'posix':
|
||||
def signal_handler(signum, frame):
|
||||
|
|
@ -237,10 +237,10 @@ def _limit_time (seconds):
|
|||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
|
||||
def limit_time (seconds, skip=False):
|
||||
def limit_time(seconds, skip=False):
|
||||
"""Limit test time to the given number of seconds, else fail or skip."""
|
||||
def run_limited (func):
|
||||
def new_func (*args, **kwargs):
|
||||
def run_limited(func):
|
||||
def new_func(*args, **kwargs):
|
||||
try:
|
||||
with _limit_time(seconds):
|
||||
return func(*args, **kwargs)
|
||||
|
|
@ -253,7 +253,7 @@ def limit_time (seconds, skip=False):
|
|||
return run_limited
|
||||
|
||||
|
||||
def get_file (filename=None):
|
||||
def get_file(filename=None):
|
||||
"""
|
||||
Get file name located within 'data' directory.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from builtins import str as str_text
|
|||
get_url_from = linkcheck.checker.get_url_from
|
||||
|
||||
|
||||
class TestLogger (linkcheck.logger._Logger):
|
||||
class TestLogger(linkcheck.logger._Logger):
|
||||
"""
|
||||
Output logger for automatic regression tests.
|
||||
"""
|
||||
|
|
@ -53,7 +53,7 @@ class TestLogger (linkcheck.logger._Logger):
|
|||
'url',
|
||||
]
|
||||
|
||||
def __init__ (self, **kwargs):
|
||||
def __init__(self, **kwargs):
|
||||
"""
|
||||
The kwargs must have "expected" keyword with the expected logger
|
||||
output lines.
|
||||
|
|
@ -81,13 +81,13 @@ class TestLogger (linkcheck.logger._Logger):
|
|||
flags=re.DOTALL | re.MULTILINE)
|
||||
if x])).splitlines()
|
||||
|
||||
def start_output (self):
|
||||
def start_output(self):
|
||||
"""
|
||||
Nothing to do here.
|
||||
"""
|
||||
pass
|
||||
|
||||
def log_url (self, url_data):
|
||||
def log_url(self, url_data):
|
||||
"""
|
||||
Append logger output to self.result.
|
||||
"""
|
||||
|
|
@ -131,7 +131,7 @@ class TestLogger (linkcheck.logger._Logger):
|
|||
# note: do not append url_data.result since this is
|
||||
# platform dependent
|
||||
|
||||
def end_output (self, linknumber=-1, **kwargs):
|
||||
def end_output(self, linknumber=-1, **kwargs):
|
||||
"""
|
||||
Stores differences between expected and result in self.diff.
|
||||
"""
|
||||
|
|
@ -147,11 +147,11 @@ class TestLogger (linkcheck.logger._Logger):
|
|||
self.diff.append(line)
|
||||
|
||||
|
||||
def get_file_url (filename):
|
||||
def get_file_url(filename):
|
||||
return re.sub("^([a-zA-Z]):", r"/\1|", filename.replace("\\", "/"))
|
||||
|
||||
|
||||
def add_fileoutput_config (config):
|
||||
def add_fileoutput_config(config):
|
||||
if os.name == 'posix':
|
||||
devnull = '/dev/null'
|
||||
elif os.name == 'nt':
|
||||
|
|
@ -165,7 +165,7 @@ def add_fileoutput_config (config):
|
|||
config['fileoutput'].append(logger)
|
||||
|
||||
|
||||
def get_test_aggregate (confargs, logargs, logger=TestLogger):
|
||||
def get_test_aggregate(confargs, logargs, logger=TestLogger):
|
||||
"""Initialize a test configuration object."""
|
||||
config = linkcheck.configuration.Configuration()
|
||||
config.logger_add(logger)
|
||||
|
|
@ -183,23 +183,23 @@ def get_test_aggregate (confargs, logargs, logger=TestLogger):
|
|||
return linkcheck.director.get_aggregate(config)
|
||||
|
||||
|
||||
class LinkCheckTest (unittest.TestCase):
|
||||
class LinkCheckTest(unittest.TestCase):
|
||||
"""
|
||||
Functional test class with ability to test local files.
|
||||
"""
|
||||
logger = TestLogger
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Ensure the current locale setting is the default.
|
||||
Otherwise, warnings will get translated and will break tests."""
|
||||
super(LinkCheckTest, self).setUp()
|
||||
linkcheck.init_i18n(loc='C')
|
||||
|
||||
def norm (self, url, encoding="utf-8"):
|
||||
def norm(self, url, encoding="utf-8"):
|
||||
"""Helper function to norm a url."""
|
||||
return linkcheck.url.url_norm(url, encoding=encoding)[0]
|
||||
|
||||
def get_attrs (self, **kwargs):
|
||||
def get_attrs(self, **kwargs):
|
||||
"""Return current and data directory as dictionary.
|
||||
You can augment the dict with keyword attributes."""
|
||||
d = {
|
||||
|
|
@ -209,7 +209,7 @@ class LinkCheckTest (unittest.TestCase):
|
|||
d.update(kwargs)
|
||||
return d
|
||||
|
||||
def get_resultlines (self, filename):
|
||||
def get_resultlines(self, filename):
|
||||
"""
|
||||
Return contents of file, as list of lines without line endings,
|
||||
ignoring empty lines and lines starting with a hash sign (#).
|
||||
|
|
@ -230,7 +230,7 @@ class LinkCheckTest (unittest.TestCase):
|
|||
"""Get URL for given filename."""
|
||||
return get_file(filename)
|
||||
|
||||
def file_test (self, filename, confargs=None):
|
||||
def file_test(self, filename, confargs=None):
|
||||
"""Check <filename> with expected result in <filename>.result."""
|
||||
url = self.get_url(filename)
|
||||
if confargs is None:
|
||||
|
|
@ -249,7 +249,7 @@ class LinkCheckTest (unittest.TestCase):
|
|||
self.fail("%d internal errors occurred!"
|
||||
% logger.stats.internal_errors)
|
||||
|
||||
def direct (self, url, resultlines, parts=None, recursionlevel=0,
|
||||
def direct(self, url, resultlines, parts=None, recursionlevel=0,
|
||||
confargs=None, url_encoding=None):
|
||||
"""Check url with expected result."""
|
||||
assert isinstance(url, str_text), repr(url)
|
||||
|
|
@ -273,18 +273,18 @@ class LinkCheckTest (unittest.TestCase):
|
|||
self.fail(os.linesep.join(l))
|
||||
|
||||
|
||||
class MailTest (LinkCheckTest):
|
||||
class MailTest(LinkCheckTest):
|
||||
"""Test mailto: link checking."""
|
||||
|
||||
def mail_valid (self, addr, **kwargs):
|
||||
def mail_valid(self, addr, **kwargs):
|
||||
"""Test valid mail address."""
|
||||
return self.mail_test(addr, "valid", **kwargs)
|
||||
|
||||
def mail_error (self, addr, **kwargs):
|
||||
def mail_error(self, addr, **kwargs):
|
||||
"""Test error mail address."""
|
||||
return self.mail_test(addr, "error", **kwargs)
|
||||
|
||||
def mail_test (self, addr, result, encoding="utf-8", cache_key=None, warning=None):
|
||||
def mail_test(self, addr, result, encoding="utf-8", cache_key=None, warning=None):
|
||||
"""Test mail address."""
|
||||
url = self.norm(addr, encoding=encoding)
|
||||
if cache_key is None:
|
||||
|
|
|
|||
|
|
@ -26,21 +26,21 @@ from . import LinkCheckTest
|
|||
|
||||
TIMEOUT = 5
|
||||
|
||||
class FtpServerTest (LinkCheckTest):
|
||||
class FtpServerTest(LinkCheckTest):
|
||||
"""Start/stop an FTP server that can be used for testing."""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""Init test class and store default ftp server port."""
|
||||
super(FtpServerTest, self).__init__(methodName=methodName)
|
||||
self.host = 'localhost'
|
||||
self.port = None
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Start a new FTP server in a new thread."""
|
||||
self.port = start_server(self.host, 0)
|
||||
self.assertFalse(self.port is None)
|
||||
|
||||
def tearDown (self):
|
||||
def tearDown(self):
|
||||
"""Send stop request to server."""
|
||||
try:
|
||||
stop_server(self.host, self.port)
|
||||
|
|
@ -48,7 +48,7 @@ class FtpServerTest (LinkCheckTest):
|
|||
pass
|
||||
|
||||
|
||||
def start_server (host, port):
|
||||
def start_server(host, port):
|
||||
def line_logger(self, msg):
|
||||
if "kill" in msg:
|
||||
raise KeyboardInterrupt()
|
||||
|
|
@ -95,7 +95,7 @@ def start_server (host, port):
|
|||
return port
|
||||
|
||||
|
||||
def stop_server (host, port):
|
||||
def stop_server(host, port):
|
||||
"""Stop a running FTP server."""
|
||||
ftp = FTP()
|
||||
ftp.connect(host, port, TIMEOUT)
|
||||
|
|
|
|||
|
|
@ -30,12 +30,12 @@ from . import LinkCheckTest
|
|||
from .. import get_file
|
||||
|
||||
|
||||
class StoppableHttpRequestHandler (SimpleHTTPRequestHandler):
|
||||
class StoppableHttpRequestHandler(SimpleHTTPRequestHandler):
|
||||
"""
|
||||
HTTP request handler with QUIT stopping the server.
|
||||
"""
|
||||
|
||||
def do_QUIT (self):
|
||||
def do_QUIT(self):
|
||||
"""
|
||||
Send 200 OK response, and set server.stop to True.
|
||||
"""
|
||||
|
|
@ -43,7 +43,7 @@ class StoppableHttpRequestHandler (SimpleHTTPRequestHandler):
|
|||
self.end_headers()
|
||||
self.server.stop = True
|
||||
|
||||
def log_message (self, format, *args):
|
||||
def log_message(self, format, *args):
|
||||
"""
|
||||
Logging is disabled.
|
||||
"""
|
||||
|
|
@ -55,12 +55,12 @@ StoppableHttpRequestHandler.extensions_map.update({
|
|||
})
|
||||
|
||||
|
||||
class StoppableHttpServer (HTTPServer):
|
||||
class StoppableHttpServer(HTTPServer):
|
||||
"""
|
||||
HTTP server that reacts to self.stop flag.
|
||||
"""
|
||||
|
||||
def serve_forever (self):
|
||||
def serve_forever(self):
|
||||
"""
|
||||
Handle one request at a time until stopped.
|
||||
"""
|
||||
|
|
@ -69,13 +69,13 @@ class StoppableHttpServer (HTTPServer):
|
|||
self.handle_request()
|
||||
|
||||
|
||||
class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
|
||||
class NoQueryHttpRequestHandler(StoppableHttpRequestHandler):
|
||||
"""
|
||||
Handler ignoring the query part of requests and sending dummy directory
|
||||
listings.
|
||||
"""
|
||||
|
||||
def remove_path_query (self):
|
||||
def remove_path_query(self):
|
||||
"""
|
||||
Remove everything after a question mark.
|
||||
"""
|
||||
|
|
@ -90,7 +90,7 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
|
|||
return status
|
||||
return 500
|
||||
|
||||
def do_GET (self):
|
||||
def do_GET(self):
|
||||
"""
|
||||
Removes query part of GET request.
|
||||
"""
|
||||
|
|
@ -104,7 +104,7 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
|
|||
else:
|
||||
super(NoQueryHttpRequestHandler, self).do_GET()
|
||||
|
||||
def do_HEAD (self):
|
||||
def do_HEAD(self):
|
||||
"""
|
||||
Removes query part of HEAD request.
|
||||
"""
|
||||
|
|
@ -147,12 +147,12 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
|
|||
return f
|
||||
|
||||
|
||||
class HttpServerTest (LinkCheckTest):
|
||||
class HttpServerTest(LinkCheckTest):
|
||||
"""
|
||||
Start/stop an HTTP server that can be used for testing.
|
||||
"""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""
|
||||
Init test class and store default http server port.
|
||||
"""
|
||||
|
|
@ -193,7 +193,7 @@ class HttpsServerTest(HttpServerTest):
|
|||
return "https://localhost:%d/tests/checker/data/%s" % (self.port, filename)
|
||||
|
||||
|
||||
def start_server (handler, https=False):
|
||||
def start_server(handler, https=False):
|
||||
"""Start an HTTP server thread and return its port number."""
|
||||
server_address = ('localhost', 0)
|
||||
handler.protocol_version = "HTTP/1.0"
|
||||
|
|
@ -221,7 +221,7 @@ def start_server (handler, https=False):
|
|||
return port
|
||||
|
||||
|
||||
def stop_server (port, https=False):
|
||||
def stop_server(port, https=False):
|
||||
"""Stop an HTTP server thread."""
|
||||
if https:
|
||||
conn = HTTPSConnection("localhost:%d" % port,
|
||||
|
|
@ -232,7 +232,7 @@ def stop_server (port, https=False):
|
|||
conn.getresponse()
|
||||
|
||||
|
||||
def get_cookie (maxage=2000):
|
||||
def get_cookie(maxage=2000):
|
||||
data = (
|
||||
("Comment", "justatest"),
|
||||
("Max-Age", "%d" % maxage),
|
||||
|
|
@ -243,30 +243,30 @@ def get_cookie (maxage=2000):
|
|||
return "; ".join('%s="%s"' % (key, value) for key, value in data)
|
||||
|
||||
|
||||
class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
|
||||
class CookieRedirectHttpRequestHandler(NoQueryHttpRequestHandler):
|
||||
"""Handler redirecting certain requests, and setting cookies."""
|
||||
|
||||
def end_headers (self):
|
||||
def end_headers(self):
|
||||
"""Send cookie before ending headers."""
|
||||
self.send_header("Set-Cookie", get_cookie())
|
||||
self.send_header("Set-Cookie", get_cookie(maxage=0))
|
||||
super(CookieRedirectHttpRequestHandler, self).end_headers()
|
||||
|
||||
def redirect (self):
|
||||
def redirect(self):
|
||||
"""Redirect request."""
|
||||
path = self.path.replace("redirect", "newurl")
|
||||
self.send_response(302)
|
||||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def redirect_newhost (self):
|
||||
def redirect_newhost(self):
|
||||
"""Redirect request to a new host."""
|
||||
path = "http://www.example.com/"
|
||||
self.send_response(302)
|
||||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def redirect_newscheme (self):
|
||||
def redirect_newscheme(self):
|
||||
"""Redirect request to a new scheme."""
|
||||
if "file" in self.path:
|
||||
path = "file:README.md"
|
||||
|
|
@ -276,7 +276,7 @@ class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
|
|||
self.send_header("Location", path)
|
||||
self.end_headers()
|
||||
|
||||
def do_GET (self):
|
||||
def do_GET(self):
|
||||
"""Handle redirections for GET."""
|
||||
if "redirect_newscheme" in self.path:
|
||||
self.redirect_newscheme()
|
||||
|
|
@ -287,7 +287,7 @@ class CookieRedirectHttpRequestHandler (NoQueryHttpRequestHandler):
|
|||
else:
|
||||
super(CookieRedirectHttpRequestHandler, self).do_GET()
|
||||
|
||||
def do_HEAD (self):
|
||||
def do_HEAD(self):
|
||||
"""Handle redirections for HEAD."""
|
||||
if "redirect_newscheme" in self.path:
|
||||
self.redirect_newscheme()
|
||||
|
|
|
|||
|
|
@ -25,10 +25,10 @@ from . import LinkCheckTest
|
|||
|
||||
TIMEOUT = 5
|
||||
|
||||
class TelnetServerTest (LinkCheckTest):
|
||||
class TelnetServerTest(LinkCheckTest):
|
||||
"""Start/stop a Telnet server that can be used for testing."""
|
||||
|
||||
def __init__ (self, methodName='runTest'):
|
||||
def __init__(self, methodName='runTest'):
|
||||
"""Init test class and store default ftp server port."""
|
||||
super(TelnetServerTest, self).__init__(methodName=methodName)
|
||||
self.host = 'localhost'
|
||||
|
|
@ -46,7 +46,7 @@ class TelnetServerTest (LinkCheckTest):
|
|||
netloc = self.host
|
||||
return "telnet://%s:%d" % (netloc, self.port)
|
||||
|
||||
def setUp (self):
|
||||
def setUp(self):
|
||||
"""Start a new Telnet server in a new thread."""
|
||||
self.port, self.server_thread = start_server(self.host, 0, self.stop_event)
|
||||
self.assertFalse(self.port is None)
|
||||
|
|
@ -59,7 +59,7 @@ class TelnetServerTest (LinkCheckTest):
|
|||
assert not self.server_thread.is_alive()
|
||||
|
||||
|
||||
def start_server (host, port, stop_event):
|
||||
def start_server(host, port, stop_event):
|
||||
# Instantiate Telnet server class and listen to host:port
|
||||
clients = []
|
||||
def on_connect(client):
|
||||
|
|
|
|||
|
|
@ -19,12 +19,12 @@ Test html anchor parsing and checking.
|
|||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestAnchor (LinkCheckTest):
|
||||
class TestAnchor(LinkCheckTest):
|
||||
"""
|
||||
Test anchor checking of HTML pages.
|
||||
"""
|
||||
|
||||
def test_anchor (self):
|
||||
def test_anchor(self):
|
||||
confargs = {"enabledplugins": ["AnchorCheck"]}
|
||||
url = "file://%(curdir)s/%(datadir)s/anchor.html" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
|
|
|
|||
|
|
@ -19,19 +19,19 @@ Test html <base> tag parsing.
|
|||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestBase (LinkCheckTest):
|
||||
class TestBase(LinkCheckTest):
|
||||
"""
|
||||
Test links of base*.html files.
|
||||
"""
|
||||
|
||||
def test_base1 (self):
|
||||
def test_base1(self):
|
||||
self.file_test("base1.html")
|
||||
|
||||
def test_base2 (self):
|
||||
def test_base2(self):
|
||||
self.file_test("base2.html")
|
||||
|
||||
def test_base3 (self):
|
||||
def test_base3(self):
|
||||
self.file_test("base3.html")
|
||||
|
||||
def test_base4 (self):
|
||||
def test_base4(self):
|
||||
self.file_test("base4.html")
|
||||
|
|
|
|||
|
|
@ -21,33 +21,33 @@ from .. import need_network, need_biplist
|
|||
import os
|
||||
|
||||
|
||||
class TestBookmarks (LinkCheckTest):
|
||||
class TestBookmarks(LinkCheckTest):
|
||||
"""
|
||||
Test bookmark link checking and content parsing.
|
||||
"""
|
||||
|
||||
@need_network
|
||||
def test_firefox_bookmarks (self):
|
||||
def test_firefox_bookmarks(self):
|
||||
# firefox 3 bookmark file parsing
|
||||
self.file_test("places.sqlite")
|
||||
|
||||
@need_network
|
||||
def test_opera_bookmarks (self):
|
||||
def test_opera_bookmarks(self):
|
||||
# Opera bookmark file parsing
|
||||
self.file_test("opera6.adr")
|
||||
|
||||
@need_network
|
||||
def test_chromium_bookmarks (self):
|
||||
def test_chromium_bookmarks(self):
|
||||
# Chromium and Google Chrome bookmark file parsing
|
||||
self.file_test("Bookmarks")
|
||||
|
||||
@need_network
|
||||
def test_safari_bookmarks_xml (self):
|
||||
def test_safari_bookmarks_xml(self):
|
||||
# Safari bookmark file parsing (for plaintext plist files)
|
||||
self.file_test(os.path.join("plist_xml", "Bookmarks.plist"))
|
||||
|
||||
@need_network
|
||||
@need_biplist
|
||||
def test_safari_bookmarks_binary (self):
|
||||
def test_safari_bookmarks_binary(self):
|
||||
# Safari bookmark file parsing (for binary plist files)
|
||||
self.file_test(os.path.join("plist_binary", "Bookmarks.plist"))
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Test html <base> tag parsing.
|
|||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestBase (LinkCheckTest):
|
||||
class TestBase(LinkCheckTest):
|
||||
"""
|
||||
Test, if charset encoding is done right.
|
||||
The linkchecker should translate the encoding
|
||||
|
|
|
|||
|
|
@ -20,12 +20,12 @@ Test error checking.
|
|||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestError (LinkCheckTest):
|
||||
class TestError(LinkCheckTest):
|
||||
"""
|
||||
Test unrecognized or syntactically wrong links.
|
||||
"""
|
||||
|
||||
def test_unrecognized (self):
|
||||
def test_unrecognized(self):
|
||||
# Unrecognized scheme
|
||||
url = "hutzli:"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -38,7 +38,7 @@ class TestError (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid1 (self):
|
||||
def test_invalid1(self):
|
||||
# invalid scheme chars
|
||||
url = "äöü:"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -52,7 +52,7 @@ class TestError (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid2 (self):
|
||||
def test_invalid2(self):
|
||||
# missing scheme alltogether
|
||||
url = "äöü"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
@ -66,7 +66,7 @@ class TestError (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_invalid3 (self):
|
||||
def test_invalid3(self):
|
||||
# really fucked up
|
||||
url = "@³²¼][½ ³@] ¬½"
|
||||
attrs = self.get_attrs(url=url)
|
||||
|
|
|
|||
|
|
@ -26,7 +26,7 @@ from tests import need_word, need_pdflib
|
|||
from . import LinkCheckTest, get_file
|
||||
|
||||
|
||||
def unzip (filename, targetdir):
|
||||
def unzip(filename, targetdir):
|
||||
"""Unzip given zipfile into targetdir."""
|
||||
if isinstance(targetdir, unicode):
|
||||
targetdir = str(targetdir)
|
||||
|
|
@ -42,34 +42,34 @@ def unzip (filename, targetdir):
|
|||
outfile.close()
|
||||
|
||||
|
||||
class TestFile (LinkCheckTest):
|
||||
class TestFile(LinkCheckTest):
|
||||
"""
|
||||
Test file:// link checking (and file content parsing).
|
||||
"""
|
||||
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.file_test("file.html")
|
||||
|
||||
def test_html_url_quote (self):
|
||||
def test_html_url_quote(self):
|
||||
self.file_test("file_url_quote.html")
|
||||
|
||||
def test_wml (self):
|
||||
def test_wml(self):
|
||||
self.file_test("file.wml")
|
||||
|
||||
def test_text (self):
|
||||
def test_text(self):
|
||||
self.file_test("file.txt")
|
||||
|
||||
def test_asc (self):
|
||||
def test_asc(self):
|
||||
self.file_test("file.asc")
|
||||
|
||||
def test_css (self):
|
||||
def test_css(self):
|
||||
self.file_test("file.css")
|
||||
|
||||
def test_php (self):
|
||||
def test_php(self):
|
||||
self.file_test("file.php")
|
||||
|
||||
@need_word
|
||||
def test_word (self):
|
||||
def test_word(self):
|
||||
confargs = dict(enabledplugins=["WordParser"])
|
||||
self.file_test("file.doc", confargs=confargs)
|
||||
|
||||
|
|
@ -82,11 +82,11 @@ class TestFile (LinkCheckTest):
|
|||
confargs = dict(enabledplugins=["MarkdownCheck"])
|
||||
self.file_test("file.markdown", confargs=confargs)
|
||||
|
||||
def test_urllist (self):
|
||||
def test_urllist(self):
|
||||
self.file_test("urllist.txt")
|
||||
|
||||
@pytest.mark.xfail
|
||||
def test_directory_listing (self):
|
||||
def test_directory_listing(self):
|
||||
# unpack non-unicode filename which cannot be stored
|
||||
# in the SF subversion repository
|
||||
if os.name != 'posix' or sys.platform != 'linux2':
|
||||
|
|
@ -96,11 +96,11 @@ class TestFile (LinkCheckTest):
|
|||
unzip(dirname + ".zip", os.path.dirname(dirname))
|
||||
self.file_test("dir")
|
||||
|
||||
def test_unicode_filename (self):
|
||||
def test_unicode_filename(self):
|
||||
# a unicode filename
|
||||
self.file_test("Мошкова.bin")
|
||||
|
||||
def test_good_file (self):
|
||||
def test_good_file(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
@ -111,7 +111,7 @@ class TestFile (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_bad_file (self):
|
||||
def test_bad_file(self):
|
||||
if os.name == 'nt':
|
||||
# Fails on NT platforms and I am too lazy to fix
|
||||
# Cause: url get quoted %7C which gets lowercased to
|
||||
|
|
@ -127,7 +127,7 @@ class TestFile (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_file_missing_dslash (self):
|
||||
def test_good_file_missing_dslash(self):
|
||||
# good file (missing double slash)
|
||||
attrs = self.get_attrs()
|
||||
url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs
|
||||
|
|
@ -139,7 +139,7 @@ class TestFile (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_dir (self):
|
||||
def test_good_dir(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs()
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -149,7 +149,7 @@ class TestFile (LinkCheckTest):
|
|||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_dir_space (self):
|
||||
def test_good_dir_space(self):
|
||||
url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()
|
||||
|
|
|
|||
|
|
@ -19,10 +19,10 @@ Test html <frame> tag parsing.
|
|||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestFrames (LinkCheckTest):
|
||||
class TestFrames(LinkCheckTest):
|
||||
"""
|
||||
Test link checking of HTML framesets.
|
||||
"""
|
||||
|
||||
def test_frames (self):
|
||||
def test_frames(self):
|
||||
self.file_test("frames.html")
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@ from .. import need_pyftpdlib
|
|||
from .ftpserver import FtpServerTest
|
||||
|
||||
|
||||
class TestFtp (FtpServerTest):
|
||||
class TestFtp(FtpServerTest):
|
||||
"""Test ftp: link checking."""
|
||||
|
||||
@need_pyftpdlib
|
||||
def test_ftp (self):
|
||||
def test_ftp(self):
|
||||
# ftp two slashes
|
||||
url = "ftp://%s:%d/" % (self.host, self.port)
|
||||
resultlines = [
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ import pytest
|
|||
from tests import need_network
|
||||
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
|
||||
|
||||
class TestHttp (HttpServerTest):
|
||||
class TestHttp(HttpServerTest):
|
||||
"""Test http:// link checking."""
|
||||
|
||||
def __init__(self, methodName='runTest'):
|
||||
|
|
@ -30,7 +30,7 @@ class TestHttp (HttpServerTest):
|
|||
self.handler = CookieRedirectHttpRequestHandler
|
||||
|
||||
@need_network
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
confargs = dict(recursionlevel=1)
|
||||
self.file_test("http.html", confargs=confargs)
|
||||
self.file_test("http_lowercase.html", confargs=confargs)
|
||||
|
|
|
|||
|
|
@ -19,15 +19,15 @@ Test http checking.
|
|||
from .httpserver import HttpServerTest
|
||||
from tests import need_network
|
||||
|
||||
class TestHttpMisc (HttpServerTest):
|
||||
class TestHttpMisc(HttpServerTest):
|
||||
"""Test http:// misc link checking."""
|
||||
|
||||
@need_network
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.swf_test()
|
||||
self.file_test("sitemap.xml")
|
||||
|
||||
def swf_test (self):
|
||||
def swf_test(self):
|
||||
url = self.get_url("test.swf")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Test http checking.
|
|||
from tests import need_network
|
||||
from .httpserver import HttpServerTest, CookieRedirectHttpRequestHandler
|
||||
|
||||
class TestHttpRedirect (HttpServerTest):
|
||||
class TestHttpRedirect(HttpServerTest):
|
||||
"""Test http:// link redirection checking."""
|
||||
|
||||
def __init__(self, methodName='runTest'):
|
||||
|
|
@ -27,14 +27,14 @@ class TestHttpRedirect (HttpServerTest):
|
|||
self.handler = CookieRedirectHttpRequestHandler
|
||||
|
||||
@need_network
|
||||
def test_redirect (self):
|
||||
def test_redirect(self):
|
||||
self.redirect1()
|
||||
self.redirect2()
|
||||
self.redirect3()
|
||||
self.redirect4()
|
||||
self.redirect5()
|
||||
|
||||
def redirect1 (self):
|
||||
def redirect1(self):
|
||||
url = "http://localhost:%d/redirect1" % self.port
|
||||
nurl = url
|
||||
rurl = url.replace("redirect", "newurl")
|
||||
|
|
@ -47,7 +47,7 @@ class TestHttpRedirect (HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=0)
|
||||
|
||||
def redirect2 (self):
|
||||
def redirect2(self):
|
||||
url = "http://localhost:%d/tests/checker/data/redirect.html" % \
|
||||
self.port
|
||||
nurl = url
|
||||
|
|
@ -61,12 +61,12 @@ class TestHttpRedirect (HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
||||
def redirect3 (self):
|
||||
def redirect3(self):
|
||||
url = "http://localhost:%d/tests/checker/data/redir.html" % self.port
|
||||
resultlines = self.get_resultlines("redir.html")
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
||||
def redirect4 (self):
|
||||
def redirect4(self):
|
||||
url = "http://localhost:%d/redirect_newscheme_ftp" % self.port
|
||||
nurl = url
|
||||
#rurl = "ftp://example.com/"
|
||||
|
|
@ -84,7 +84,7 @@ class TestHttpRedirect (HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
||||
def redirect5 (self):
|
||||
def redirect5(self):
|
||||
url = "http://localhost:%d/redirect_newscheme_file" % self.port
|
||||
nurl = url
|
||||
#rurl = "file:README"
|
||||
|
|
|
|||
|
|
@ -18,14 +18,14 @@ Test http checking.
|
|||
"""
|
||||
from .httpserver import HttpServerTest
|
||||
|
||||
class TestHttpRobots (HttpServerTest):
|
||||
class TestHttpRobots(HttpServerTest):
|
||||
"""Test robots.txt link checking behaviour."""
|
||||
|
||||
def test_html (self):
|
||||
def test_html(self):
|
||||
self.robots_txt_test()
|
||||
self.robots_txt2_test()
|
||||
|
||||
def robots_txt_test (self):
|
||||
def robots_txt_test(self):
|
||||
url = "http://localhost:%d/robots.txt" % self.port
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
@ -35,7 +35,7 @@ class TestHttpRobots (HttpServerTest):
|
|||
]
|
||||
self.direct(url, resultlines, recursionlevel=5)
|
||||
|
||||
def robots_txt2_test (self):
|
||||
def robots_txt2_test(self):
|
||||
url = "http://localhost:%d/secret" % self.port
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ class TestHttps(HttpsServerTest):
|
|||
f.write(crypto.dump_certificate(crypto.FILETYPE_PEM, cert))
|
||||
|
||||
@need_network
|
||||
def test_https (self):
|
||||
def test_https(self):
|
||||
url = self.get_url("")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
|
|
|
|||
Some files were not shown because too many files have changed in this diff Show more
Loading…
Reference in a new issue