linkchecker/linkcheck/logger/__init__.py

471 lines
15 KiB
Python
Raw Normal View History

2014-01-08 21:33:04 +00:00
# Copyright (C) 2000-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
2009-07-24 21:58:20 +00:00
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Output logging support for different formats.
"""
import sys
import os
import datetime
2010-11-01 08:58:03 +00:00
import time
import codecs
2012-06-19 21:27:26 +00:00
import abc
2020-05-19 18:56:42 +00:00
from .. import log, LOG_CHECK, strformat, dummy, configuration, i18n
Fix remaining flake8 violations in linkcheck/ linkcheck/better_exchook2.py:28:89: E501 line too long (90 > 88 characters) linkcheck/better_exchook2.py:155:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:166:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:289:13: E741 ambiguous variable name 'l' linkcheck/better_exchook2.py:299:9: E722 do not use bare 'except' linkcheck/containers.py:48:13: E731 do not assign a lambda expression, use a def linkcheck/ftpparse.py:123:89: E501 line too long (93 > 88 characters) linkcheck/loader.py:46:47: E203 whitespace before ':' linkcheck/logconf.py:45:29: E231 missing whitespace after ',' linkcheck/robotparser2.py:157:89: E501 line too long (95 > 88 characters) linkcheck/robotparser2.py:182:89: E501 line too long (89 > 88 characters) linkcheck/strformat.py:181:16: E203 whitespace before ':' linkcheck/strformat.py:181:43: E203 whitespace before ':' linkcheck/strformat.py:253:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:254:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:341:89: E501 line too long (111 > 88 characters) linkcheck/url.py:102:32: E203 whitespace before ':' linkcheck/url.py:277:5: E741 ambiguous variable name 'l' linkcheck/url.py:402:5: E741 ambiguous variable name 'l' linkcheck/checker/__init__.py:203:1: E402 module level import not at top of file linkcheck/checker/fileurl.py:200:89: E501 line too long (103 > 88 characters) linkcheck/checker/mailtourl.py:122:60: E203 whitespace before ':' linkcheck/checker/mailtourl.py:157:89: E501 line too long (96 > 88 characters) linkcheck/checker/mailtourl.py:190:89: E501 line too long (109 > 88 characters) linkcheck/checker/mailtourl.py:200:89: E501 line too long (111 > 88 characters) linkcheck/checker/mailtourl.py:249:89: E501 line too long (106 > 88 characters) linkcheck/checker/unknownurl.py:226:23: W291 trailing whitespace linkcheck/checker/urlbase.py:245:89: E501 line too long (101 > 88 characters) linkcheck/configuration/confparse.py:236:89: E501 line too long (186 > 88 characters) linkcheck/configuration/confparse.py:247:89: E501 line too long (111 > 88 characters) linkcheck/configuration/__init__.py:164:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:184:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:190:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:195:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:198:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:435:89: E501 line too long (90 > 88 characters) linkcheck/director/aggregator.py:45:43: E231 missing whitespace after ',' linkcheck/director/aggregator.py:178:89: E501 line too long (106 > 88 characters) linkcheck/logger/__init__.py:29:1: E731 do not assign a lambda expression, use a def linkcheck/logger/__init__.py:108:13: E741 ambiguous variable name 'l' linkcheck/logger/__init__.py:275:19: F821 undefined name '_' linkcheck/logger/__init__.py:342:16: F821 undefined name '_' linkcheck/logger/__init__.py:380:13: F821 undefined name '_' linkcheck/logger/__init__.py:384:13: F821 undefined name '_' linkcheck/logger/__init__.py:387:13: F821 undefined name '_' linkcheck/logger/__init__.py:396:13: F821 undefined name '_' linkcheck/network/__init__.py:1:1: W391 blank line at end of file linkcheck/plugins/locationinfo.py:89:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/locationinfo.py:91:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/markdowncheck.py:112:89: E501 line too long (111 > 88 characters) linkcheck/plugins/markdowncheck.py:141:9: E741 ambiguous variable name 'l' linkcheck/plugins/markdowncheck.py:165:23: E203 whitespace before ':' linkcheck/plugins/viruscheck.py:95:42: E203 whitespace before ':'
2020-05-30 16:01:36 +00:00
_ = lambda x: x # noqa: E731
Fields = dict(
realurl=_("Real URL"),
cachekey=_("Cache key"),
result=_("Result"),
base=_("Base"),
name=_("Name"),
parenturl=_("Parent URL"),
extern=_("Extern"),
info=_("Info"),
warning=_("Warning"),
2010-11-05 11:53:57 +00:00
dltime=_("D/L time"),
2009-02-20 13:03:09 +00:00
dlsize=_("Size"),
2010-11-05 11:53:57 +00:00
checktime=_("Check time"),
url=_("URL"),
level=_("Level"),
2012-09-18 10:12:00 +00:00
modified=_("Modified"),
)
del _
ContentTypes = dict(image=0, text=0, video=0, audio=0, application=0, mail=0, other=0)
2010-03-13 07:47:12 +00:00
class LogStatistics:
"""Gather log statistics:
- number of errors, warnings and valid links
2010-12-15 06:55:00 +00:00
- type of contents (image, video, audio, text, ...)
- URL lengths
"""
def __init__(self):
2011-02-14 20:06:34 +00:00
"""Initialize log statistics."""
self.reset()
def reset(self):
2011-02-14 20:06:34 +00:00
"""Reset all log statistics to default values."""
# number of logged URLs
self.number = 0
# number of encountered URL errors
self.errors = 0
# number of URL errors that were printed
self.errors_printed = 0
# number of URL warnings
self.warnings = 0
# number of URL warnings that were printed
self.warnings_printed = 0
# number of internal errors
self.internal_errors = 0
2012-09-17 13:23:25 +00:00
# link types
self.link_types = ContentTypes.copy()
2012-09-17 13:23:25 +00:00
# URL length statistics
2010-12-15 06:55:00 +00:00
self.max_url_length = 0
self.min_url_length = 0
self.avg_url_length = 0.0
self.avg_number = 0
2014-03-14 20:06:10 +00:00
# overall downloaded bytes
self.downloaded_bytes = None
def log_url(self, url_data, do_print):
2011-02-14 20:06:34 +00:00
"""Log URL statistics."""
self.number += 1
if not url_data.valid:
self.errors += 1
if do_print:
self.errors_printed += 1
num_warnings = len(url_data.warnings)
self.warnings += num_warnings
if do_print:
self.warnings_printed += num_warnings
if url_data.content_type:
key = url_data.content_type.split('/', 1)[0].lower()
if key not in self.link_types:
key = "other"
2020-04-30 19:11:59 +00:00
elif url_data.url.startswith("mailto:"):
2010-12-15 06:55:00 +00:00
key = "mail"
else:
2010-12-15 06:55:00 +00:00
key = "other"
self.link_types[key] += 1
2010-12-15 06:55:00 +00:00
if url_data.url:
Fix remaining flake8 violations in linkcheck/ linkcheck/better_exchook2.py:28:89: E501 line too long (90 > 88 characters) linkcheck/better_exchook2.py:155:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:166:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:289:13: E741 ambiguous variable name 'l' linkcheck/better_exchook2.py:299:9: E722 do not use bare 'except' linkcheck/containers.py:48:13: E731 do not assign a lambda expression, use a def linkcheck/ftpparse.py:123:89: E501 line too long (93 > 88 characters) linkcheck/loader.py:46:47: E203 whitespace before ':' linkcheck/logconf.py:45:29: E231 missing whitespace after ',' linkcheck/robotparser2.py:157:89: E501 line too long (95 > 88 characters) linkcheck/robotparser2.py:182:89: E501 line too long (89 > 88 characters) linkcheck/strformat.py:181:16: E203 whitespace before ':' linkcheck/strformat.py:181:43: E203 whitespace before ':' linkcheck/strformat.py:253:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:254:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:341:89: E501 line too long (111 > 88 characters) linkcheck/url.py:102:32: E203 whitespace before ':' linkcheck/url.py:277:5: E741 ambiguous variable name 'l' linkcheck/url.py:402:5: E741 ambiguous variable name 'l' linkcheck/checker/__init__.py:203:1: E402 module level import not at top of file linkcheck/checker/fileurl.py:200:89: E501 line too long (103 > 88 characters) linkcheck/checker/mailtourl.py:122:60: E203 whitespace before ':' linkcheck/checker/mailtourl.py:157:89: E501 line too long (96 > 88 characters) linkcheck/checker/mailtourl.py:190:89: E501 line too long (109 > 88 characters) linkcheck/checker/mailtourl.py:200:89: E501 line too long (111 > 88 characters) linkcheck/checker/mailtourl.py:249:89: E501 line too long (106 > 88 characters) linkcheck/checker/unknownurl.py:226:23: W291 trailing whitespace linkcheck/checker/urlbase.py:245:89: E501 line too long (101 > 88 characters) linkcheck/configuration/confparse.py:236:89: E501 line too long (186 > 88 characters) linkcheck/configuration/confparse.py:247:89: E501 line too long (111 > 88 characters) linkcheck/configuration/__init__.py:164:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:184:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:190:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:195:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:198:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:435:89: E501 line too long (90 > 88 characters) linkcheck/director/aggregator.py:45:43: E231 missing whitespace after ',' linkcheck/director/aggregator.py:178:89: E501 line too long (106 > 88 characters) linkcheck/logger/__init__.py:29:1: E731 do not assign a lambda expression, use a def linkcheck/logger/__init__.py:108:13: E741 ambiguous variable name 'l' linkcheck/logger/__init__.py:275:19: F821 undefined name '_' linkcheck/logger/__init__.py:342:16: F821 undefined name '_' linkcheck/logger/__init__.py:380:13: F821 undefined name '_' linkcheck/logger/__init__.py:384:13: F821 undefined name '_' linkcheck/logger/__init__.py:387:13: F821 undefined name '_' linkcheck/logger/__init__.py:396:13: F821 undefined name '_' linkcheck/network/__init__.py:1:1: W391 blank line at end of file linkcheck/plugins/locationinfo.py:89:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/locationinfo.py:91:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/markdowncheck.py:112:89: E501 line too long (111 > 88 characters) linkcheck/plugins/markdowncheck.py:141:9: E741 ambiguous variable name 'l' linkcheck/plugins/markdowncheck.py:165:23: E203 whitespace before ':' linkcheck/plugins/viruscheck.py:95:42: E203 whitespace before ':'
2020-05-30 16:01:36 +00:00
n = len(url_data.url)
self.max_url_length = max(n, self.max_url_length)
2010-12-15 06:55:00 +00:00
if self.min_url_length == 0:
Fix remaining flake8 violations in linkcheck/ linkcheck/better_exchook2.py:28:89: E501 line too long (90 > 88 characters) linkcheck/better_exchook2.py:155:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:166:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:289:13: E741 ambiguous variable name 'l' linkcheck/better_exchook2.py:299:9: E722 do not use bare 'except' linkcheck/containers.py:48:13: E731 do not assign a lambda expression, use a def linkcheck/ftpparse.py:123:89: E501 line too long (93 > 88 characters) linkcheck/loader.py:46:47: E203 whitespace before ':' linkcheck/logconf.py:45:29: E231 missing whitespace after ',' linkcheck/robotparser2.py:157:89: E501 line too long (95 > 88 characters) linkcheck/robotparser2.py:182:89: E501 line too long (89 > 88 characters) linkcheck/strformat.py:181:16: E203 whitespace before ':' linkcheck/strformat.py:181:43: E203 whitespace before ':' linkcheck/strformat.py:253:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:254:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:341:89: E501 line too long (111 > 88 characters) linkcheck/url.py:102:32: E203 whitespace before ':' linkcheck/url.py:277:5: E741 ambiguous variable name 'l' linkcheck/url.py:402:5: E741 ambiguous variable name 'l' linkcheck/checker/__init__.py:203:1: E402 module level import not at top of file linkcheck/checker/fileurl.py:200:89: E501 line too long (103 > 88 characters) linkcheck/checker/mailtourl.py:122:60: E203 whitespace before ':' linkcheck/checker/mailtourl.py:157:89: E501 line too long (96 > 88 characters) linkcheck/checker/mailtourl.py:190:89: E501 line too long (109 > 88 characters) linkcheck/checker/mailtourl.py:200:89: E501 line too long (111 > 88 characters) linkcheck/checker/mailtourl.py:249:89: E501 line too long (106 > 88 characters) linkcheck/checker/unknownurl.py:226:23: W291 trailing whitespace linkcheck/checker/urlbase.py:245:89: E501 line too long (101 > 88 characters) linkcheck/configuration/confparse.py:236:89: E501 line too long (186 > 88 characters) linkcheck/configuration/confparse.py:247:89: E501 line too long (111 > 88 characters) linkcheck/configuration/__init__.py:164:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:184:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:190:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:195:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:198:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:435:89: E501 line too long (90 > 88 characters) linkcheck/director/aggregator.py:45:43: E231 missing whitespace after ',' linkcheck/director/aggregator.py:178:89: E501 line too long (106 > 88 characters) linkcheck/logger/__init__.py:29:1: E731 do not assign a lambda expression, use a def linkcheck/logger/__init__.py:108:13: E741 ambiguous variable name 'l' linkcheck/logger/__init__.py:275:19: F821 undefined name '_' linkcheck/logger/__init__.py:342:16: F821 undefined name '_' linkcheck/logger/__init__.py:380:13: F821 undefined name '_' linkcheck/logger/__init__.py:384:13: F821 undefined name '_' linkcheck/logger/__init__.py:387:13: F821 undefined name '_' linkcheck/logger/__init__.py:396:13: F821 undefined name '_' linkcheck/network/__init__.py:1:1: W391 blank line at end of file linkcheck/plugins/locationinfo.py:89:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/locationinfo.py:91:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/markdowncheck.py:112:89: E501 line too long (111 > 88 characters) linkcheck/plugins/markdowncheck.py:141:9: E741 ambiguous variable name 'l' linkcheck/plugins/markdowncheck.py:165:23: E203 whitespace before ':' linkcheck/plugins/viruscheck.py:95:42: E203 whitespace before ':'
2020-05-30 16:01:36 +00:00
self.min_url_length = n
2010-12-15 06:55:00 +00:00
else:
Fix remaining flake8 violations in linkcheck/ linkcheck/better_exchook2.py:28:89: E501 line too long (90 > 88 characters) linkcheck/better_exchook2.py:155:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:166:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:289:13: E741 ambiguous variable name 'l' linkcheck/better_exchook2.py:299:9: E722 do not use bare 'except' linkcheck/containers.py:48:13: E731 do not assign a lambda expression, use a def linkcheck/ftpparse.py:123:89: E501 line too long (93 > 88 characters) linkcheck/loader.py:46:47: E203 whitespace before ':' linkcheck/logconf.py:45:29: E231 missing whitespace after ',' linkcheck/robotparser2.py:157:89: E501 line too long (95 > 88 characters) linkcheck/robotparser2.py:182:89: E501 line too long (89 > 88 characters) linkcheck/strformat.py:181:16: E203 whitespace before ':' linkcheck/strformat.py:181:43: E203 whitespace before ':' linkcheck/strformat.py:253:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:254:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:341:89: E501 line too long (111 > 88 characters) linkcheck/url.py:102:32: E203 whitespace before ':' linkcheck/url.py:277:5: E741 ambiguous variable name 'l' linkcheck/url.py:402:5: E741 ambiguous variable name 'l' linkcheck/checker/__init__.py:203:1: E402 module level import not at top of file linkcheck/checker/fileurl.py:200:89: E501 line too long (103 > 88 characters) linkcheck/checker/mailtourl.py:122:60: E203 whitespace before ':' linkcheck/checker/mailtourl.py:157:89: E501 line too long (96 > 88 characters) linkcheck/checker/mailtourl.py:190:89: E501 line too long (109 > 88 characters) linkcheck/checker/mailtourl.py:200:89: E501 line too long (111 > 88 characters) linkcheck/checker/mailtourl.py:249:89: E501 line too long (106 > 88 characters) linkcheck/checker/unknownurl.py:226:23: W291 trailing whitespace linkcheck/checker/urlbase.py:245:89: E501 line too long (101 > 88 characters) linkcheck/configuration/confparse.py:236:89: E501 line too long (186 > 88 characters) linkcheck/configuration/confparse.py:247:89: E501 line too long (111 > 88 characters) linkcheck/configuration/__init__.py:164:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:184:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:190:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:195:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:198:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:435:89: E501 line too long (90 > 88 characters) linkcheck/director/aggregator.py:45:43: E231 missing whitespace after ',' linkcheck/director/aggregator.py:178:89: E501 line too long (106 > 88 characters) linkcheck/logger/__init__.py:29:1: E731 do not assign a lambda expression, use a def linkcheck/logger/__init__.py:108:13: E741 ambiguous variable name 'l' linkcheck/logger/__init__.py:275:19: F821 undefined name '_' linkcheck/logger/__init__.py:342:16: F821 undefined name '_' linkcheck/logger/__init__.py:380:13: F821 undefined name '_' linkcheck/logger/__init__.py:384:13: F821 undefined name '_' linkcheck/logger/__init__.py:387:13: F821 undefined name '_' linkcheck/logger/__init__.py:396:13: F821 undefined name '_' linkcheck/network/__init__.py:1:1: W391 blank line at end of file linkcheck/plugins/locationinfo.py:89:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/locationinfo.py:91:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/markdowncheck.py:112:89: E501 line too long (111 > 88 characters) linkcheck/plugins/markdowncheck.py:141:9: E741 ambiguous variable name 'l' linkcheck/plugins/markdowncheck.py:165:23: E203 whitespace before ':' linkcheck/plugins/viruscheck.py:95:42: E203 whitespace before ':'
2020-05-30 16:01:36 +00:00
self.min_url_length = min(n, self.min_url_length)
2010-12-15 06:55:00 +00:00
# track average number separately since empty URLs do not count
self.avg_number += 1
# calculate running average
Fix remaining flake8 violations in linkcheck/ linkcheck/better_exchook2.py:28:89: E501 line too long (90 > 88 characters) linkcheck/better_exchook2.py:155:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:166:9: E722 do not use bare 'except' linkcheck/better_exchook2.py:289:13: E741 ambiguous variable name 'l' linkcheck/better_exchook2.py:299:9: E722 do not use bare 'except' linkcheck/containers.py:48:13: E731 do not assign a lambda expression, use a def linkcheck/ftpparse.py:123:89: E501 line too long (93 > 88 characters) linkcheck/loader.py:46:47: E203 whitespace before ':' linkcheck/logconf.py:45:29: E231 missing whitespace after ',' linkcheck/robotparser2.py:157:89: E501 line too long (95 > 88 characters) linkcheck/robotparser2.py:182:89: E501 line too long (89 > 88 characters) linkcheck/strformat.py:181:16: E203 whitespace before ':' linkcheck/strformat.py:181:43: E203 whitespace before ':' linkcheck/strformat.py:253:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:254:9: E731 do not assign a lambda expression, use a def linkcheck/strformat.py:341:89: E501 line too long (111 > 88 characters) linkcheck/url.py:102:32: E203 whitespace before ':' linkcheck/url.py:277:5: E741 ambiguous variable name 'l' linkcheck/url.py:402:5: E741 ambiguous variable name 'l' linkcheck/checker/__init__.py:203:1: E402 module level import not at top of file linkcheck/checker/fileurl.py:200:89: E501 line too long (103 > 88 characters) linkcheck/checker/mailtourl.py:122:60: E203 whitespace before ':' linkcheck/checker/mailtourl.py:157:89: E501 line too long (96 > 88 characters) linkcheck/checker/mailtourl.py:190:89: E501 line too long (109 > 88 characters) linkcheck/checker/mailtourl.py:200:89: E501 line too long (111 > 88 characters) linkcheck/checker/mailtourl.py:249:89: E501 line too long (106 > 88 characters) linkcheck/checker/unknownurl.py:226:23: W291 trailing whitespace linkcheck/checker/urlbase.py:245:89: E501 line too long (101 > 88 characters) linkcheck/configuration/confparse.py:236:89: E501 line too long (186 > 88 characters) linkcheck/configuration/confparse.py:247:89: E501 line too long (111 > 88 characters) linkcheck/configuration/__init__.py:164:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:184:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:190:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:195:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:198:9: E266 too many leading '#' for block comment linkcheck/configuration/__init__.py:435:89: E501 line too long (90 > 88 characters) linkcheck/director/aggregator.py:45:43: E231 missing whitespace after ',' linkcheck/director/aggregator.py:178:89: E501 line too long (106 > 88 characters) linkcheck/logger/__init__.py:29:1: E731 do not assign a lambda expression, use a def linkcheck/logger/__init__.py:108:13: E741 ambiguous variable name 'l' linkcheck/logger/__init__.py:275:19: F821 undefined name '_' linkcheck/logger/__init__.py:342:16: F821 undefined name '_' linkcheck/logger/__init__.py:380:13: F821 undefined name '_' linkcheck/logger/__init__.py:384:13: F821 undefined name '_' linkcheck/logger/__init__.py:387:13: F821 undefined name '_' linkcheck/logger/__init__.py:396:13: F821 undefined name '_' linkcheck/network/__init__.py:1:1: W391 blank line at end of file linkcheck/plugins/locationinfo.py:89:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/locationinfo.py:91:9: E731 do not assign a lambda expression, use a def linkcheck/plugins/markdowncheck.py:112:89: E501 line too long (111 > 88 characters) linkcheck/plugins/markdowncheck.py:141:9: E741 ambiguous variable name 'l' linkcheck/plugins/markdowncheck.py:165:23: E203 whitespace before ':' linkcheck/plugins/viruscheck.py:95:42: E203 whitespace before ':'
2020-05-30 16:01:36 +00:00
self.avg_url_length += (n - self.avg_url_length) / self.avg_number
def log_internal_error(self):
"""Increase internal error count."""
self.internal_errors += 1
class _Logger(abc.ABC):
"""
Base class for logging of checked urls. It defines the public API
(see below) and offers basic functionality for all loggers.
2011-03-09 11:08:03 +00:00
Each logger offers the following functions:
* start_output()
Initialize and start log output. Most loggers print a comment
with copyright information.
* end_output(**kwargs)
Finish log output, possibly flushing buffers. Most loggers also
print some statistics.
Custom keyword arguments can be given for different loggers.
* log_filter_url(url_data, do_print)
Log a checked URL. The url_data object is a transport form of
the UrlData class. The do_print flag indicates if this URL
should be logged or just used to update internal statistics.
2011-03-09 11:08:03 +00:00
Each subclassed logger must implement the following functions:
* start_output()
Also call the base class implementation of this.
* end_output(**kwargs)
2011-03-09 11:08:03 +00:00
See above.
* log_url(url_data)
Log a checked URL. Called by log_filter_url if do_print is True.
"""
2013-12-11 17:41:55 +00:00
# A lowercase name for this logger, usable for option values
LoggerName = None
# Default log configuration
LoggerArgs = {}
def __init__(self, **args):
"""
Initialize a logger, looking for part restrictions in kwargs.
"""
if 'parts' in args and "all" not in args['parts']:
# only log given parts
self.logparts = args['parts']
else:
# log all parts
self.logparts = None
# number of spaces before log parts for alignment
self.logspaces = {}
# maximum indent of spaces for alignment
self.max_indent = 0
# log statistics
self.stats = LogStatistics()
# encoding of output
encoding = args.get("encoding", i18n.default_encoding)
try:
encoding = codecs.lookup(encoding).name
except LookupError:
encoding = i18n.default_encoding
self.output_encoding = encoding
# how to handle codec errors
self.codec_errors = "replace"
# Flag to see if logger is active. Can be deactivated on errors.
self.is_active = True
2013-12-11 17:41:55 +00:00
def get_args(self, kwargs):
"""Construct log configuration from default and user args."""
args = dict(self.LoggerArgs)
args.update(kwargs)
return args
def get_charset_encoding(self):
"""Translate the output encoding to a charset encoding name."""
if self.output_encoding == "utf-8-sig":
return "utf-8"
return self.output_encoding
def encode(self, s):
2010-11-22 06:43:33 +00:00
"""Encode string with output encoding."""
2020-05-19 18:56:42 +00:00
assert isinstance(s, str)
2010-11-22 06:43:33 +00:00
return s.encode(self.output_encoding, self.codec_errors)
def init_fileoutput(self, args):
"""
Initialize self.fd file descriptor from args. For file output
(used when the fileoutput arg is given), the self.fd
initialization is deferred until the first self.write() call.
2010-11-26 20:23:13 +00:00
This avoids creation of an empty file when no output is written.
"""
self.filename = None
self.close_fd = False
self.fd = None
if args.get('fileoutput'):
self.filename = os.path.expanduser(args['filename'])
elif 'fd' in args:
self.fd = args['fd']
else:
2011-04-06 10:54:58 +00:00
self.fd = self.create_fd()
def start_fileoutput(self):
2011-02-14 20:06:34 +00:00
"""Start output to configured file."""
path = os.path.dirname(self.filename)
try:
if path and not os.path.isdir(path):
os.makedirs(path)
2010-11-21 19:48:50 +00:00
self.fd = self.create_fd()
self.close_fd = True
except OSError:
msg = sys.exc_info()[1]
2020-05-30 16:01:36 +00:00
log.warn(
LOG_CHECK,
"Could not open file %r for writing: %s\n"
"Disabling log output of %s",
2020-05-30 16:01:36 +00:00
self.filename,
msg,
self,
)
self.fd = dummy.Dummy()
self.is_active = False
self.filename = None
def create_fd(self):
2010-11-21 19:48:50 +00:00
"""Create open file descriptor."""
2011-04-06 10:54:58 +00:00
if self.filename is None:
2020-05-30 16:01:36 +00:00
return i18n.get_encoded_writer(
encoding=self.output_encoding, errors=self.codec_errors
)
return codecs.open(self.filename, "wb", self.output_encoding, self.codec_errors)
2010-11-21 19:48:50 +00:00
def close_fileoutput(self):
"""
Flush and close the file output denoted by self.fd.
"""
if self.fd is not None:
2012-09-14 20:09:18 +00:00
try:
self.flush()
except OSError:
2012-09-14 20:09:18 +00:00
# ignore flush errors
pass
if self.close_fd:
2012-09-14 20:09:18 +00:00
try:
self.fd.close()
except OSError:
2012-09-14 20:09:18 +00:00
# ignore close errors
pass
self.fd = None
def check_date(self):
"""
Check for special dates.
"""
now = datetime.date.today()
if now.day == 7 and now.month == 1:
msg = _("Happy birthday for LinkChecker, I'm %d years old today!")
self.comment(msg % (now.year - 2000))
def comment(self, s, **args):
"""
Write a comment and a newline. This method just prints
the given string.
"""
self.writeln(s=s, **args)
def wrap(self, lines, width):
"""
Return wrapped version of given lines.
"""
2020-05-30 16:01:36 +00:00
sep = os.linesep + os.linesep
text = sep.join(lines)
2020-05-30 16:01:36 +00:00
kwargs = dict(
subsequent_indent=" " * self.max_indent,
initial_indent=" " * self.max_indent,
break_long_words=False,
break_on_hyphens=False,
)
return strformat.wrap(text, width, **kwargs).lstrip()
def write(self, s, **args):
"""Write string to output descriptor. Strips control characters
from string before writing.
"""
if self.filename is not None:
self.start_fileoutput()
if self.fd is None:
# Happens when aborting threads times out
log.warn(LOG_CHECK, "writing to uninitialized or closed file")
else:
2012-09-14 20:09:18 +00:00
try:
self.fd.write(s, **args)
except OSError:
2012-09-14 20:09:18 +00:00
msg = sys.exc_info()[1]
2020-05-30 16:01:36 +00:00
log.warn(
LOG_CHECK,
"Could not write to output file: %s\n"
"Disabling log output of %s",
2020-05-30 16:01:36 +00:00
msg,
self,
)
2012-09-14 20:09:18 +00:00
self.close_fileoutput()
self.fd = dummy.Dummy()
self.is_active = False
def writeln(self, s="", **args):
"""
Write string to output descriptor plus a newline.
"""
2022-11-08 19:21:29 +00:00
self.write(f"{s}{os.linesep}", **args)
def has_part(self, name):
"""
See if given part name will be logged.
"""
if self.logparts is None:
# log all parts
return True
return name in self.logparts
def part(self, name):
"""
Return translated part name.
"""
2020-04-30 19:11:59 +00:00
return _(Fields.get(name, ""))
def spaces(self, name):
"""
Return indent of spaces for given part name.
"""
return self.logspaces[name]
def start_output(self):
"""
Start log output.
"""
# map with spaces between part name and value
if self.logparts is None:
parts = Fields.keys()
else:
parts = self.logparts
values = (self.part(x) for x in parts)
# maximum indent for localized log part names
2020-05-30 16:01:36 +00:00
self.max_indent = max(len(x) for x in values) + 1
for key in parts:
2020-05-30 16:01:36 +00:00
numspaces = self.max_indent - len(self.part(key))
2020-04-30 19:11:59 +00:00
self.logspaces[key] = " " * numspaces
self.stats.reset()
2010-11-01 08:58:03 +00:00
self.starttime = time.time()
def log_filter_url(self, url_data, do_print):
"""
2009-03-02 22:40:36 +00:00
Log a new url with this logger if do_print is True. Else
only update accounting data.
"""
self.stats.log_url(url_data, do_print)
if do_print:
self.log_url(url_data)
def write_intro(self):
2010-11-01 08:58:03 +00:00
"""Write intro comments."""
2020-05-30 16:01:36 +00:00
self.comment(
_("created by %(app)s at %(time)s")
% {"app": configuration.AppName, "time": strformat.strtime(self.starttime)}
)
self.comment(
2021-12-06 19:36:22 +00:00
_("Read the documentation at %(url)s") % {'url': configuration.Url}
2020-05-30 16:01:36 +00:00
)
self.comment(
_("Write comments and bugs to %(url)s") % {'url': configuration.SupportUrl}
)
2010-11-01 08:58:03 +00:00
self.check_date()
def write_outro(self):
2010-11-01 08:58:03 +00:00
"""Write outro comments."""
self.stoptime = time.time()
duration = self.stoptime - self.starttime
2020-05-30 16:01:36 +00:00
self.comment(
_("Stopped checking at %(time)s (%(duration)s)")
% {
"time": strformat.strtime(self.stoptime),
"duration": strformat.strduration_long(duration),
}
)
2010-11-01 08:58:03 +00:00
2012-06-19 21:27:26 +00:00
@abc.abstractmethod
def log_url(self, url_data):
"""
Log a new url with this logger.
"""
pass
2012-06-19 21:27:26 +00:00
@abc.abstractmethod
def end_output(self, **kwargs):
"""
End of output, used for cleanup (eg output buffer flushing).
"""
pass
def __str__(self):
"""
Return class name.
"""
return self.__class__.__name__
def __repr__(self):
"""
Return class name.
"""
return repr(self.__class__.__name__)
def flush(self):
"""
If the logger has internal buffers, flush them.
Ignore flush I/O errors since we are not responsible for proper
flushing of log output streams.
"""
if hasattr(self, "fd"):
try:
self.fd.flush()
except (OSError, AttributeError):
pass
def log_internal_error(self):
"""Indicate that an internal error occurred in the program."""
log.warn(LOG_CHECK, "internal error occurred")
self.stats.log_internal_error()
2012-09-19 09:05:26 +00:00
def format_modified(self, modified, sep=" "):
"""Format modification date in UTC if it's not None.
2020-07-25 15:35:48 +00:00
@param modified: modification date in UTC
2020-07-25 15:35:48 +00:00
@type modified: datetime or None
2012-09-19 09:05:26 +00:00
@return: formatted date or empty string
@rtype: unicode
"""
if modified is not None:
return modified.strftime(f"%Y-%m-%d{sep}%H:%M:%S.%fZ")
2020-04-30 19:11:59 +00:00
return ""
2012-09-19 09:05:26 +00:00
2020-05-30 16:01:36 +00:00
2013-12-11 17:41:55 +00:00
def _get_loggers():
"""Return list of Logger classes."""
from .. import loader
2020-05-30 16:01:36 +00:00
modules = loader.get_package_modules('logger', __path__)
return list(loader.get_plugins(modules, [_Logger]))
2013-12-11 17:41:55 +00:00
2013-12-11 17:41:55 +00:00
LoggerClasses = _get_loggers()
LoggerNames = [x.LoggerName for x in LoggerClasses]
LoggerKeys = ", ".join(repr(x) for x in LoggerNames)