Refactor output loggers.

This commit is contained in:
Bastian Kleineidam 2013-12-11 18:41:55 +01:00
parent b98fab331a
commit 5736987b60
23 changed files with 294 additions and 176 deletions

View file

@ -98,17 +98,21 @@ A: Use the `--check-html` and `--check-css` options.
**Q: I want to have my own logging class. How can I use it in LinkChecker?**
A: A Python API lets you define new logging classes.
Define your own logging class as a subclass of StandardLogger or any other
Define your own logging class as a subclass of _Logger or any other
logging class in the log module.
Then call the addLogger function in Config.Configuration to register
Then call the add_logger function in Config.Configuration to register
your new Logger.
After this append a new Logging instance to the fileoutput.
```python
import linkcheck, MyLogger
log_format = 'mylog'
log_args = {'fileoutput': log_format, 'filename': 'foo.txt'}
import linkcheck
class MyLogger(linkcheck.logger._Logger):
LoggerName = 'mylog'
LoggerArgs = {'fileoutput': log_format, 'filename': 'foo.txt'}
# ...
cfg = linkcheck.configuration.Configuration()
cfg.logger_add(log_format, MyLogger.MyLogger)
cfg['fileoutput'].append(cfg.logger_new(log_format, log_args))
cfg.logger_add(MyLogger)
cfg['fileoutput'].append(cfg.logger_new(MyLogger.LoggerName))
```

View file

@ -153,63 +153,6 @@ class Configuration (dict):
self["status"] = False
self["status_wait_seconds"] = 5
self["fileoutput"] = []
# Logger configurations
self["text"] = {
"filename": "linkchecker-out.txt",
'colorparent': "default",
'colorurl': "default",
'colorname': "default",
'colorreal': "cyan",
'colorbase': "purple",
'colorvalid': "bold;green",
'colorinvalid': "bold;red",
'colorinfo': "default",
'colorwarning': "bold;yellow",
'colordltime': "default",
'colordlsize': "default",
'colorreset': "default",
}
self['html'] = {
"filename": "linkchecker-out.html",
'colorbackground': '#fff7e5',
'colorurl': '#dcd5cf',
'colorborder': '#000000',
'colorlink': '#191c83',
'colorwarning': '#e0954e',
'colorerror': '#db4930',
'colorok': '#3ba557',
}
self['gml'] = {
"filename": "linkchecker-out.gml",
}
self['sql'] = {
"filename": "linkchecker-out.sql",
'separator': ';',
'dbname': 'linksdb',
}
self['csv'] = {
"filename": "linkchecker-out.csv",
'separator': ';',
"quotechar": '"',
}
self['blacklist'] = {
"filename": "~/.linkchecker/blacklist",
}
self['xml'] = {
"filename": "linkchecker-out.xml",
}
self['gxml'] = {
"filename": "linkchecker-out.gxml",
}
self['dot'] = {
"filename": "linkchecker-out.dot",
"encoding": "ascii",
}
self['sitemap'] = {
"filename": "linkchecker-out.sitemap.xml",
"encoding": "utf-8",
}
self['none'] = {}
self['output'] = 'text'
self['logger'] = None
self["warningregex"] = None
@ -232,8 +175,12 @@ class Configuration (dict):
self["maxconnectionshttp"] = 10
self["maxconnectionshttps"] = 10
self["maxconnectionsftp"] = 2
from ..logger import Loggers
self.loggers = dict(**Loggers)
self.loggers = {}
from ..logger import LoggerClasses
for c in LoggerClasses:
key = c.LoggerName
self[key] = {}
self.loggers[key] = c
def init_logging (self, status_logger, debug=None, handler=None):
"""
@ -286,23 +233,13 @@ class Configuration (dict):
for key in loggers:
logging.getLogger(lognames[key]).setLevel(level)
def logger_new (self, loggertype, **kwargs):
"""
Instantiate new logger and return it.
"""
args = {}
args.update(self[loggertype])
args.update(kwargs)
return self.loggers[loggertype](**args)
def logger_new (self, loggername, **kwargs):
"""Instantiate new logger and return it."""
return self.loggers[loggername](**kwargs)
def logger_add (self, loggertype, loggerclass, loggerargs=None):
"""
Add a new logger type to the known loggers.
"""
if loggerargs is None:
loggerargs = {}
self.loggers[loggertype] = loggerclass
self[loggertype] = loggerargs
def logger_add (self, loggerclass):
"""Add a new logger type to the known loggers."""
self.loggers[loggerclass.LoggerName] = loggerclass
def read (self, files=None):
"""

View file

@ -91,8 +91,9 @@ class LCConfigParser (ConfigParser.RawConfigParser, object):
def read_output_config (self):
"""Read configuration options in section "output"."""
section = "output"
from ..logger import Loggers
for key in Loggers.keys():
from ..logger import LoggerClasses
for c in LoggerClasses:
key = c.LoggerName
if self.has_section(key):
for opt in self.options(key):
self.config[key][opt] = self.get(key, opt)
@ -127,7 +128,8 @@ class LCConfigParser (ConfigParser.RawConfigParser, object):
# strip names from whitespace
loggers = (x.strip().lower() for x in loggers)
# no file output for the blacklist and none Logger
loggers = (x for x in loggers if x in Loggers and
from ..logger import LoggerNames
loggers = (x for x in loggers if x in LoggerNames and
x not in ("blacklist", "none"))
for val in loggers:
output = self.config.logger_new(val, fileoutput=1)

View file

@ -221,8 +221,8 @@ class LinkCheckerMain (QtGui.QMainWindow, Ui_MainWindow):
# dictionary holding overwritten values
self.config_backup = {}
# set standard GUI configuration values
self.config.logger_add("gui", SignalLogger)
self.config["logger"] = self.config.logger_new('gui',
self.config.logger_add(SignalLogger)
self.config["logger"] = self.config.logger_new(SignalLogger.LoggerName,
signal=self.log_url_signal, stats=self.log_stats_signal)
self.config["status"] = True
self.config["status_wait_seconds"] = 2

View file

@ -16,7 +16,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from logging import Handler
from ..logger import Logger
from ..logger import _Logger
class GuiLogHandler (Handler, object):
@ -32,9 +32,11 @@ class GuiLogHandler (Handler, object):
self.signal.emit(self.format(record))
class SignalLogger (Logger):
class SignalLogger (_Logger):
"""Use Qt signals for logged URLs and statistics."""
LoggerName = "gui"
def __init__ (self, **args):
"""Store signals for URL and statistic data."""
super(SignalLogger, self).__init__(**args)

91
linkcheck/loader.py Normal file
View file

@ -0,0 +1,91 @@
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2012-2013 Bastian Kleineidam
"""
Functions to load plugin modules.
Example usage:
modules = loader.get_modules('plugins')
plugins = loader.get_plugins(modules, PluginClass)
"""
from __future__ import print_function
import os
import sys
import zipfile
import importlib
def is_frozen ():
"""Return True if running inside a py2exe-generated executable."""
return hasattr(sys, "frozen")
def get_modules(folder):
"""Find all valid modules in the given folder which must be in
in the same directory as this loader.py module. A valid module
has a .py extension, and is importable.
@return: all loaded valid modules
@rtype: iterator of module
"""
if is_frozen():
# find modules in library.zip filename
zipname = os.path.dirname(os.path.dirname(__file__))
with zipfile.ZipFile(zipname, 'r') as f:
parentmodule = __loader__.fullname.split('.', 1)[0]
parentpath = parentmodule.replace(".", "/")
prefix = "%s/%s/" % (parentpath, folder)
modnames = [os.path.splitext(n[len(prefix):])[0]
for n in f.namelist()
if n.startswith(prefix) and "__init__" not in n]
else:
dirname = os.path.join(os.path.dirname(__file__), folder)
modnames = get_importable_modules(dirname)
for modname in modnames:
try:
name ="..%s.%s" % (folder, modname)
yield importlib.import_module(name, __name__)
except ImportError as msg:
print("ERROR: could not load module %s: %s" % (modname, msg))
def get_importable_modules(folder):
"""Find all module files in the given folder that end with '.py' and
don't start with an underscore.
@return module names
@rtype: iterator of string
"""
for fname in os.listdir(folder):
if fname.endswith('.py') and not fname.startswith('_'):
yield fname[:-3]
def get_plugins(modules, classobj):
"""Find all class objects in all modules.
@param modules: the modules to search
@ptype modules: iterator of modules
@return: found classes
@rytpe: iterator of class objects
"""
for module in modules:
for plugin in get_module_plugins(module, classobj):
yield plugin
def get_module_plugins(module, classobj):
"""Return all subclasses of a class in the module.
If the module defines __all__, only those entries will be searched,
otherwise all objects not starting with '_' will be searched.
"""
try:
names = module.__all__
except AttributeError:
names = [x for x in vars(module) if not x.startswith('_')]
for name in names:
try:
obj = getattr(module, name)
except AttributeError:
continue
try:
if issubclass(obj, classobj):
yield obj
except TypeError:
continue

View file

@ -135,7 +135,7 @@ class LogStatistics (object):
self.internal_errors += 1
class Logger (object):
class _Logger (object):
"""
Base class for logging of checked urls. It defines the public API
(see below) and offers basic functionality for all loggers.
@ -164,6 +164,12 @@ class Logger (object):
"""
__metaclass__ = abc.ABCMeta
# A lowercase name for this logger, usable for option values
LoggerName = None
# Default log configuration
LoggerArgs = {}
def __init__ (self, **args):
"""
Initialize a logger, looking for part restrictions in kwargs.
@ -192,6 +198,12 @@ class Logger (object):
# Flag to see if logger is active. Can be deactivated on errors.
self.is_active = True
def get_args(self, kwargs):
"""Construct log configuration from default and user args."""
args = dict(self.LoggerArgs)
args.update(kwargs)
return args
def get_charset_encoding (self):
"""Translate the output encoding to a charset encoding name."""
if self.output_encoding == "utf-8-sig":
@ -446,34 +458,13 @@ class Logger (object):
return modified.strftime("%Y-%m-%d{0}%H:%M:%S.%fZ".format(sep))
return u""
# the standard URL logger implementations
from .text import TextLogger
from .html import HtmlLogger
from .gml import GMLLogger
from .dot import DOTLogger
from .sql import SQLLogger
from .csvlog import CSVLogger
from .blacklist import BlacklistLogger
from .gxml import GraphXMLLogger
from .customxml import CustomXMLLogger
from .none import NoneLogger
from .sitemapxml import SitemapXmlLogger
def _get_loggers():
"""Return list of Logger classes."""
from .. import loader
modules = loader.get_modules('logger')
return list(loader.get_plugins(modules, _Logger))
# default URL logger classes
Loggers = {
"text": TextLogger,
"html": HtmlLogger,
"gml": GMLLogger,
"dot": DOTLogger,
"sql": SQLLogger,
"csv": CSVLogger,
"blacklist": BlacklistLogger,
"gxml": GraphXMLLogger,
"xml": CustomXMLLogger,
"sitemap": SitemapXmlLogger,
"none": NoneLogger,
}
# for easy printing: a comma separated logger list
LoggerKeys = ", ".join(repr(name) for name in Loggers)
LoggerClasses = _get_loggers()
LoggerNames = [x.LoggerName for x in LoggerClasses]
LoggerKeys = ", ".join(repr(x) for x in LoggerNames)

View file

@ -20,20 +20,25 @@ A blacklist logger.
import os
import codecs
from . import Logger
from . import _Logger
class BlacklistLogger (Logger):
class BlacklistLogger (_Logger):
"""
Updates a blacklist of wrong links. If a link on the blacklist
is working (again), it is removed from the list. So after n days
we have only links on the list which failed for n days.
"""
def __init__ (self, **args):
"""
Intialize with old blacklist data (if found, else not).
"""
LoggerName = "blacklist"
LoggerArgs = {
"filename": "~/.linkchecker/blacklist",
}
def __init__ (self, **kwargs):
"""Intialize with old blacklist data (if found, else not)."""
args = self.get_args(kwargs)
super(BlacklistLogger, self).__init__(**args)
self.init_fileoutput(args)
self.blacklist = {}

View file

@ -20,7 +20,7 @@ A CSV logger.
import csv
import os
import sys
from . import Logger
from . import _Logger
from .. import strformat
Columns = (
@ -30,14 +30,23 @@ Columns = (
)
class CSVLogger (Logger):
class CSVLogger (_Logger):
"""
CSV output, consisting of one line per entry. Entries are
separated by a separator (a semicolon per default).
"""
def __init__ (self, **args):
LoggerName = "csv"
LoggerArgs = {
"filename": "linkchecker-out.csv",
'separator': ';',
"quotechar": '"',
}
def __init__ (self, **kwargs):
"""Store default separator and (os dependent) line terminator."""
args = self.get_args(kwargs)
super(CSVLogger, self).__init__(**args)
# due to a limitation of the csv module, all output has to be
# utf-8 encoded

View file

@ -21,11 +21,17 @@ from . import xmllog
from .. import strformat
class CustomXMLLogger (xmllog.XMLLogger):
class CustomXMLLogger (xmllog._XMLLogger):
"""
XML custom output for easy post-processing.
"""
LoggerName = "xml"
LoggerArgs = {
"filename": "linkchecker-out.xml",
}
def start_output (self):
"""
Write start of checking info as xml comment.

View file

@ -18,14 +18,21 @@
A DOT graph format logger. The specification has been taken from
http://www.graphviz.org/doc/info/lang.html
"""
from .graph import GraphLogger
from .graph import _GraphLogger
class DOTLogger (GraphLogger):
class DOTLogger (_GraphLogger):
"""
Generates .dot sitemap graphs. Use graphviz to see the sitemap graph.
"""
LoggerName = "dot"
LoggerArgs = {
"filename": "linkchecker-out.dot",
"encoding": "ascii",
}
def start_output (self):
"""Write start of checking info as DOT comment."""
super(DOTLogger, self).start_output()

View file

@ -17,13 +17,19 @@
"""
A gml logger.
"""
from .graph import GraphLogger
from .graph import _GraphLogger
class GMLLogger (GraphLogger):
class GMLLogger (_GraphLogger):
"""GML means Graph Modeling Language. Use a GML tool to see
the sitemap graph."""
LoggerName = 'gml'
LoggerArgs = {
"filename": "linkchecker-out.gml",
}
def start_output (self):
"""Write start of checking info as gml comment."""
super(GMLLogger, self).start_output()

View file

@ -17,17 +17,18 @@
"""
Base class for graph loggers.
"""
from . import Logger
from . import _Logger
from ..decorators import notimplemented
import re
class GraphLogger (Logger):
class _GraphLogger (_Logger):
"""Provide base method to get node data."""
def __init__ (self, **args):
def __init__ (self, **kwargs):
"""Initialize graph node list and internal id counter."""
super(GraphLogger, self).__init__(**args)
args = self.get_args(kwargs)
super(_GraphLogger, self).__init__(**args)
self.init_fileoutput(args)
self.nodes = {}
self.nodeid = 0

View file

@ -18,16 +18,23 @@
A GraphXML logger.
"""
from .xmllog import XMLLogger
from .graph import GraphLogger
from .xmllog import _XMLLogger
from .graph import _GraphLogger
class GraphXMLLogger (XMLLogger, GraphLogger):
class GraphXMLLogger (_XMLLogger, _GraphLogger):
"""XML output mirroring the GML structure. Easy to parse with any XML
tool."""
def __init__ (self, **args):
LoggerName = 'gxml'
LoggerArgs = {
"filename": "linkchecker-out.gxml",
}
def __init__ (self, **kwargs):
"""Initialize graph node list and internal id counter."""
args = self.get_args(kwargs)
super(GraphXMLLogger, self).__init__(**args)
self.nodes = {}
self.nodeid = 0

View file

@ -20,7 +20,7 @@ A HTML logger.
import time
import cgi
import os
from . import Logger
from . import _Logger
from .. import strformat, configuration
@ -50,11 +50,25 @@ HTML_HEADER = """<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
"""
class HtmlLogger (Logger):
class HtmlLogger (_Logger):
"""Logger with HTML output."""
def __init__ (self, **args):
LoggerName = 'html'
LoggerArgs = {
"filename": "linkchecker-out.html",
'colorbackground': '#fff7e5',
'colorurl': '#dcd5cf',
'colorborder': '#000000',
'colorlink': '#191c83',
'colorwarning': '#e0954e',
'colorerror': '#db4930',
'colorok': '#3ba557',
}
def __init__ (self, **kwargs):
"""Initialize default HTML color values."""
args = self.get_args(kwargs)
super(HtmlLogger, self).__init__(**args)
self.init_fileoutput(args)
self.colorbackground = args['colorbackground']

View file

@ -17,14 +17,16 @@
"""
A dummy logger.
"""
from . import Logger
from . import _Logger
class NoneLogger (Logger):
class NoneLogger (_Logger):
"""
Dummy logger printing nothing.
"""
LoggerName = 'none'
def comment (self, s, **args):
"""
Do nothing.

View file

@ -33,12 +33,20 @@ ChangeFreqs = (
HTTP_SCHEMES = (u'http:', u'https:')
HTML_TYPES = ('text/html', "application/xhtml+xml")
class SitemapXmlLogger (xmllog.XMLLogger):
class SitemapXmlLogger (xmllog._XMLLogger):
"""Sitemap XML output according to http://www.sitemaps.org/protocol.html
"""
def __init__ (self, **args):
LoggerName = 'sitemap'
LoggerArgs = {
"filename": "linkchecker-out.sitemap.xml",
"encoding": "utf-8",
}
def __init__ (self, **kwargs):
"""Initialize graph node list and internal id counter."""
args = self.get_args(kwargs)
super(SitemapXmlLogger, self).__init__(**args)
# All URLs must have the given prefix, which is determined
# by the first logged URL.

View file

@ -19,7 +19,7 @@ A SQL logger.
"""
import os
from . import Logger
from . import _Logger
from .. import url as urlutil
@ -46,15 +46,22 @@ def intify (s):
return 0
class SQLLogger (Logger):
class SQLLogger (_Logger):
"""
SQL output, should work with any SQL database (not tested).
"""
def __init__ (self, **args):
"""
Initialize database access data.
"""
LoggerName = 'sql'
LoggerArgs = {
"filename": "linkchecker-out.sql",
'separator': ';',
'dbname': 'linksdb',
}
def __init__ (self, **kwargs):
"""Initialize database access data."""
args = self.get_args(kwargs)
super(SQLLogger, self).__init__(**args)
self.init_fileoutput(args)
self.dbname = args['dbname']

View file

@ -18,11 +18,11 @@
The default text logger.
"""
import time
from . import Logger
from . import _Logger
from .. import ansicolor, strformat, configuration, i18n
class TextLogger (Logger):
class TextLogger (_Logger):
"""
A text logger, colorizing the output if possible.
@ -34,10 +34,27 @@ class TextLogger (Logger):
Unknown keywords will be ignored.
"""
def __init__ (self, **args):
"""
Initialize error counter and optional file output.
"""
LoggerName = 'text'
LoggerArgs = {
"filename": "linkchecker-out.txt",
'colorparent': "default",
'colorurl': "default",
'colorname': "default",
'colorreal': "cyan",
'colorbase': "purple",
'colorvalid': "bold;green",
'colorinvalid': "bold;red",
'colorinfo': "default",
'colorwarning': "bold;yellow",
'colordltime': "default",
'colordlsize': "default",
'colorreset': "default",
}
def __init__ (self, **kwargs):
"""Initialize error counter and optional file output."""
args = self.get_args(kwargs)
super(TextLogger, self).__init__(**args)
self.output_encoding = args.get("encoding", i18n.default_encoding)
self.init_fileoutput(args)

View file

@ -19,7 +19,7 @@ Base class for XML loggers.
"""
import xml.sax.saxutils
from . import Logger
from . import _Logger
xmlattr_entities = {
@ -44,14 +44,13 @@ def xmlquoteattr (s):
return xml.sax.saxutils.escape(s, xmlattr_entities)
class XMLLogger (Logger):
class _XMLLogger (_Logger):
"""Base class for XML output; easy to parse with any XML tool."""
def __init__ (self, **args):
"""
Initialize graph node list and internal id counter.
"""
super(XMLLogger, self).__init__(**args)
def __init__ (self, **kwargs):
""" Initialize graph node list and internal id counter. """
args = self.get_args(kwargs)
super(_XMLLogger, self).__init__(**args)
self.init_fileoutput(args)
self.indent = u" "
self.level = 0

View file

@ -478,7 +478,7 @@ if options.output:
else:
logtype, encoding = options.output, i18n.default_encoding
logtype = logtype.lower()
if logtype not in linkcheck.logger.Loggers:
if logtype not in linkcheck.logger.LoggerNames:
print_usage(
_("Unknown logger type %(type)r in %(output)r for option %(option)s") % \
{"type": logtype, "output": options.output, "option": "'-o, --output'"})
@ -510,7 +510,7 @@ if options.fileoutput:
ns['filename'] = suffix
else:
ns['filename'] = suffix
if ftype not in linkcheck.logger.Loggers:
if ftype not in linkcheck.logger.LoggerNames:
print_usage(
_("Unknown logger type %(type)r in %(output)r for option %(option)s") % \
{"type": ftype, "output": options.fileoutput,

View file

@ -32,19 +32,22 @@ from .. import get_file
get_url_from = linkcheck.checker.get_url_from
class TestLogger (linkcheck.logger.Logger):
class TestLogger (linkcheck.logger._Logger):
"""
Output logger for automatic regression tests.
"""
LoggerName = 'test'
def __init__ (self, **kwargs):
"""
The kwargs must have "expected" keyword with the expected logger
output lines.
"""
super(TestLogger, self).__init__(**kwargs)
args = self.get_args(kwargs)
super(TestLogger, self).__init__(**args)
# list of expected output lines
self.expected = kwargs['expected']
self.expected = args['expected']
# list of real output lines
self.result = []
# diff between expected and real output
@ -108,7 +111,7 @@ def add_fileoutput_config (config):
devnull = 'NUL'
else:
return
for ftype in linkcheck.logger.Loggers.keys():
for ftype in linkcheck.logger.LoggerNames:
if ftype in ('test', 'blacklist'):
continue
logger = config.logger_new(ftype, fileoutput=1, filename=devnull)
@ -118,9 +121,9 @@ def add_fileoutput_config (config):
def get_test_aggregate (confargs, logargs):
"""Initialize a test configuration object."""
config = linkcheck.configuration.Configuration()
config.logger_add('test', TestLogger)
config.logger_add(TestLogger)
config['recursionlevel'] = 1
config['logger'] = config.logger_new('test', **logargs)
config['logger'] = config.logger_new(TestLogger.LoggerName, **logargs)
add_fileoutput_config(config)
# uncomment for debugging
#config.init_logging(None, debug=["all"])

View file

@ -78,7 +78,7 @@ class TestConfig (unittest.TestCase):
# output section
self.assertTrue(linkcheck.log.is_debug(linkcheck.LOG_THREAD))
self.assertFalse(config["status"])
self.assertTrue(isinstance(config["logger"], linkcheck.logger.Loggers["xml"]))
self.assertTrue(isinstance(config["logger"], linkcheck.logger.customxml.CustomXMLLogger))
self.assertTrue(config["verbose"])
self.assertTrue(config["complete"])
self.assertTrue(config["warnings"])