diff --git a/ChangeLog b/ChangeLog index 7de8952b..58cd3575 100644 --- a/ChangeLog +++ b/ChangeLog @@ -8,6 +8,22 @@ Type: bugfix Changed: linkcheck/logger/*.py + * All loggers have now an output encoding. Valid encodings are listed + in http://docs.python.org/lib/node127.html. The default encoding is + "iso-8859-15". + Type: feature + Changed: linkcheck/logger/*.py + + * The --output and --file-output parameters can specify the encoding + now. The documentation has been updated with this change. + Type: feature + Changed: linkchecker, linkchecker.1 + + * The encoding can also be specified in the linkcheckerrc config file. + Type: feature + Changed: config/linkcheckerrc + + 1.13.5 "Die Musterknaben" (released 22.9.2004) * Use xgettext with Python support for .pot file creation, adjusted developer documentation. diff --git a/TODO b/TODO index 39fd02d7..70f7895c 100644 --- a/TODO +++ b/TODO @@ -3,9 +3,7 @@ Next releases: - logger methods should get called with unicode strings (using the unicode-output from the html parser) -- specify output encoding for loggers - -- standardize fileoutput init method for loggers +- use output encoding for loggers - rethink intern/extern stuff diff --git a/config/linkcheckerrc b/config/linkcheckerrc index 06bd4d5d..d4d4ce10 100644 --- a/config/linkcheckerrc +++ b/config/linkcheckerrc @@ -63,6 +63,9 @@ [gml] #filename=linkchecker-out.gml #fields=all +# valid encodings are listed in http://docs.python.org/lib/node127.html +# default encoding is iso-8859-15 +#encoding=utf_16 # CSV logger [csv] @@ -94,6 +97,10 @@ [blacklist] #filename=~/.blacklist +# xml logger +[xml] +#encoding=iso-8859-1 + # checking options [checking] # number of threads diff --git a/linkcheck/logger/__init__.py b/linkcheck/logger/__init__.py index 399f10cc..c11f604c 100644 --- a/linkcheck/logger/__init__.py +++ b/linkcheck/logger/__init__.py @@ -42,21 +42,22 @@ Fields = { class Logger (object): """basic logger class enabling logging of checked urls""" - def __init__ (self, **kwargs): + def __init__ (self, **args): """initialize a logger, looking for field restrictions in kwargs""" # what log fields should be in output self.logfields = None # log all fields + if args.has_key('fields'): + if "all" not in args['fields']: + # only log given fields + self.logfields = args['fields'] # number of spaces before log fields for alignment self.logspaces = {} # maximum indent of spaces for alignment self.max_indent = 0 # number of encountered errors self.errors = 0 - # encoding of output strings - if kwargs.has_key('fields'): - if "all" not in kwargs['fields']: - # only log given fields - self.logfields = kwargs['fields'] + # encoding of output + self.output_encoding = args.get("encoding", "iso-8859-1") def init_fileoutput (self, args): """initialize self.fd file descriptor from args""" diff --git a/linkchecker b/linkchecker index dfa9aca0..ca836fd7 100755 --- a/linkchecker +++ b/linkchecker @@ -258,15 +258,20 @@ group.add_option("-q", "--quiet", action="store_true", dest="quiet", help=_( """Quiet operation. This is only useful with -F.""")) group.add_option("-o", "--output", type="string", dest="output", + metavar="TYPE[/ENCODING]", help=_( -"""Specify output as %(loggertypes)s. Default output type is text.""") % \ +"""Specify output as %(loggertypes)s. Default output type is text. +ENCODING specifies the output encoding, the default is "iso-8859-15". +Valid encodings are listed at http://docs.python.org/lib/node127.html.""") % \ {'loggertypes': linkcheck.LoggerKeys}) group.add_option("-F", "--file-output", type="string", action="append", - dest="fileoutput", metavar="TYPE[/FILENAME]", + dest="fileoutput", metavar="TYPE[/ENCODING][/FILENAME]", help=_( """Output to a file linkchecker-out.TYPE, $HOME/.linkchecker_blacklist for 'blacklist' output, or FILENAME if specified. -The FILENAME part of the 'none' output type will be ignored, +ENCODING specifies the output encoding, the default is "iso-8859-15". +Valid encodings are listed at http://docs.python.org/lib/node127.html. +The FILENAME and ENCODING parts of the 'none' output type will be ignored, else if the file already exists, it will be overwritten. You can specify this option more than once. Valid file output TYPEs are %(loggertypes)s. You can specify this option multiple times to output diff --git a/linkchecker.1 b/linkchecker.1 index 5d88fa88..a9a63d0d 100644 --- a/linkchecker.1 +++ b/linkchecker.1 @@ -100,15 +100,23 @@ This option implies \fB-w\fP. \fB-q\fP, \fB--quiet\fP Quiet operation. This is only useful with \fB-F\fP. .TP -\fB-o\fP\fItype\fP, \fB--output=\fP\fItype\fP +\fB-o\fP\fItype\fP, \fB--output=\fP\fItype\fP[\fB/\fP\fIencoding\fP] Specify output type as \fBtext\fP, \fBcolored\fP, \fBhtml\fP, \fBsql\fP, \fBcsv\fP, \fBgml\fP, \fBxml\fP, \fBnone\fP or \fBblacklist\fP. Default type is \fBtext\fP. +\fIencoding\fP specifies the output encoding, the default is +\fBiso-8859-15\fP. +Valid encodings are listed at +\fBhttp://docs.python.org/lib/node127.html\fP. .TP -\fB-F\fP\fItype\fP[\fB/\fP\fIfilename\fP], \fB--file-output=\fP\fItype\fP[\fB/\fP\fIfilename\fP] +\fB-F\fP\fItype\fP[\fB/\fP\fIencoding\fP][\fB/\fP\fIfilename\fP], \fB--file-output=\fP\fItype\fP[\fB/\fP\fIencoding\fP][\fB/\fP\fIfilename\fP] Output to a file \fBlinkchecker-out.\fP\fItype\fP, \fB$HOME/.linkchecker_blacklist\fP for \fBblacklist\fP output, or \fIfilename\fP if specified. +\fIencoding\fP specifies the output encoding, the default is +\fBiso-8859-15\fP. +Valid encodings are listed at +\fBhttp://docs.python.org/lib/node127.html\fP. The \fIfilename\fP part of the \fBnone\fP output type will be ignored, else if the file already exists, it will be overwritten. You can specify this option more than once. Valid file output types