Introduced transport object API for logging.

This commit is contained in:
Bastian Kleineidam 2009-03-06 19:30:58 +01:00
parent 58925b21d3
commit ba160350dd
3 changed files with 55 additions and 2 deletions

View file

@ -950,6 +950,42 @@ class UrlBase (object):
"""
return u"<%s >" % self.serialized()
def to_wire (self):
"""Return a simplified transport object for logging.
XXX: at the moment, this returns just self.
The transport object must contain these attributes:
- url_data.valid: bool
Indicates if URL is valid
- url_data.cached: bool
Indicates if URL data has been loaded from cache.
- url_data.result: unicode
Result string
- url_data.warnings: list of unicode
List of warnings for this URL.
- url_data.name: unicode string or None
name of URL (eg. filename or link name)
- url_data.parent_url: unicode or None
Parent URL
- url_data.base_ref: unicode or None
HTML base reference URL of parent
- url_data.url: unicode or None
Fully qualified URL.
- url_data.checktime: int
Number of seconds needed to check this link, default: zero.
- url_data.dltime: int
Number of seconds needed to download URL content, default: -1
- url_data.dlsize: int
Size of downloaded URL content, default: -1
- url_data.info: list of unicode
Additional information about this URL.
- url_data.line: int
Line number of this URL at parent document, or -1
- url_data.column: int
Column number of this URL at parent document, or -1
"""
return self
def filter_tidy_errors (errors):
"""Filter certain errors from HTML tidy run."""

View file

@ -69,5 +69,8 @@ class Logger (object):
def log_url (self, url_data):
"""Send new url to all configured loggers."""
do_print = self.do_print(url_data)
# Only send a transport object to the loggers, not the complete
# object instance.
transport = url_data.to_wire()
for log in self.logs:
log.log_filter_url(url_data, do_print)
log.log_filter_url(transport, do_print)

View file

@ -44,7 +44,21 @@ del _
class Logger (object):
"""
Basic logger class enabling logging of checked urls.
Base class for logging of checked urls. It defines the public API
(see below) and offers basic functionality for all loggers.
Each logger has to offer the following functions:
* start_output()
Initialize and start log output. Most loggers print a comment
with copyright information.
* end_output()
Finish log output, possibly flushing buffers. Most loggers also
print some statistics.
* log_filter_url(url_data, do_print)
Log a checked URL. The url_data object is a transport form of
the UrlData class. The do_print flag indicates if this URL
should be logged or just used to update internal statistics.
"""
def __init__ (self, **args):