2014-01-08 21:33:04 +00:00
|
|
|
# Copyright (C) 2000-2014 Bastian Kleineidam
|
2004-08-16 19:28:42 +00:00
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
2009-07-24 21:58:20 +00:00
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2005-01-19 15:08:02 +00:00
|
|
|
"""
|
|
|
|
|
A CSV logger.
|
|
|
|
|
"""
|
2004-08-16 19:28:42 +00:00
|
|
|
import csv
|
2020-05-15 18:37:04 +00:00
|
|
|
from io import StringIO
|
2004-08-16 19:28:42 +00:00
|
|
|
import os
|
2020-05-15 18:37:04 +00:00
|
|
|
|
2013-12-11 17:41:55 +00:00
|
|
|
from . import _Logger
|
2004-08-16 19:28:42 +00:00
|
|
|
|
2012-09-18 10:12:00 +00:00
|
|
|
Columns = (
|
2020-05-30 16:01:36 +00:00
|
|
|
"urlname",
|
|
|
|
|
"parentname",
|
2020-08-07 18:45:24 +00:00
|
|
|
"base",
|
2020-05-30 16:01:36 +00:00
|
|
|
"result",
|
|
|
|
|
"warningstring",
|
|
|
|
|
"infostring",
|
|
|
|
|
"valid",
|
|
|
|
|
"url",
|
|
|
|
|
"line",
|
|
|
|
|
"column",
|
|
|
|
|
"name",
|
|
|
|
|
"dltime",
|
|
|
|
|
"size",
|
|
|
|
|
"checktime",
|
|
|
|
|
"cached",
|
|
|
|
|
"level",
|
|
|
|
|
"modified",
|
2012-09-18 10:12:00 +00:00
|
|
|
)
|
|
|
|
|
|
2004-08-16 19:28:42 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
class CSVLogger(_Logger):
|
2004-08-16 19:28:42 +00:00
|
|
|
"""
|
2005-01-19 14:38:01 +00:00
|
|
|
CSV output, consisting of one line per entry. Entries are
|
2010-07-31 20:30:11 +00:00
|
|
|
separated by a separator (a semicolon per default).
|
2005-01-19 14:38:01 +00:00
|
|
|
"""
|
|
|
|
|
|
2013-12-11 17:41:55 +00:00
|
|
|
LoggerName = "csv"
|
|
|
|
|
|
|
|
|
|
LoggerArgs = {
|
|
|
|
|
"filename": "linkchecker-out.csv",
|
|
|
|
|
'separator': ';',
|
|
|
|
|
"quotechar": '"',
|
2014-03-06 21:40:52 +00:00
|
|
|
"dialect": "excel",
|
2013-12-11 17:41:55 +00:00
|
|
|
}
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def __init__(self, **kwargs):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Store default separator and (os dependent) line terminator."""
|
2013-12-11 17:41:55 +00:00
|
|
|
args = self.get_args(kwargs)
|
2020-06-03 19:06:36 +00:00
|
|
|
super().__init__(**args)
|
2004-08-25 20:08:53 +00:00
|
|
|
self.init_fileoutput(args)
|
2004-08-16 19:28:42 +00:00
|
|
|
self.separator = args['separator']
|
2005-01-20 22:21:43 +00:00
|
|
|
self.quotechar = args['quotechar']
|
2014-03-06 21:40:52 +00:00
|
|
|
self.dialect = args['dialect']
|
2010-11-21 19:48:50 +00:00
|
|
|
self.linesep = os.linesep
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def comment(self, s, **args):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Write CSV comment."""
|
2020-04-30 19:11:59 +00:00
|
|
|
self.writeln(s="# %s" % s, **args)
|
2004-11-25 16:20:58 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def start_output(self):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Write checking start info as csv comment."""
|
2020-06-03 19:06:36 +00:00
|
|
|
super().start_output()
|
2005-01-16 23:30:16 +00:00
|
|
|
row = []
|
2005-05-08 20:06:40 +00:00
|
|
|
if self.has_part("intro"):
|
2010-11-01 08:58:03 +00:00
|
|
|
self.write_intro()
|
2004-08-16 19:28:42 +00:00
|
|
|
self.flush()
|
2009-06-18 17:59:54 +00:00
|
|
|
else:
|
|
|
|
|
# write empty string to initialize file output
|
2020-04-30 19:11:59 +00:00
|
|
|
self.write("")
|
2016-12-04 00:29:27 +00:00
|
|
|
self.queue = StringIO()
|
2020-05-30 16:01:36 +00:00
|
|
|
self.writer = csv.writer(
|
|
|
|
|
self.queue,
|
|
|
|
|
dialect=self.dialect,
|
|
|
|
|
delimiter=self.separator,
|
|
|
|
|
lineterminator=self.linesep,
|
|
|
|
|
quotechar=self.quotechar,
|
|
|
|
|
)
|
2012-09-18 10:12:00 +00:00
|
|
|
for s in Columns:
|
2009-06-18 04:52:33 +00:00
|
|
|
if self.has_part(s):
|
|
|
|
|
row.append(s)
|
2005-01-16 23:30:16 +00:00
|
|
|
if row:
|
2010-11-21 19:48:50 +00:00
|
|
|
self.writerow(row)
|
2004-08-16 19:28:42 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def log_url(self, url_data):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Write csv formatted url check info."""
|
2004-10-27 22:34:50 +00:00
|
|
|
row = []
|
2011-04-06 09:12:05 +00:00
|
|
|
if self.has_part("urlname"):
|
|
|
|
|
row.append(url_data.base_url)
|
|
|
|
|
if self.has_part("parentname"):
|
|
|
|
|
row.append(url_data.parent_url)
|
2020-08-07 18:45:24 +00:00
|
|
|
if self.has_part("base"):
|
2011-04-06 09:12:05 +00:00
|
|
|
row.append(url_data.base_ref)
|
|
|
|
|
if self.has_part("result"):
|
|
|
|
|
row.append(url_data.result)
|
|
|
|
|
if self.has_part("warningstring"):
|
|
|
|
|
row.append(self.linesep.join(x[1] for x in url_data.warnings))
|
|
|
|
|
if self.has_part("infostring"):
|
|
|
|
|
row.append(self.linesep.join(url_data.info))
|
|
|
|
|
if self.has_part("valid"):
|
|
|
|
|
row.append(url_data.valid)
|
|
|
|
|
if self.has_part("url"):
|
|
|
|
|
row.append(url_data.url)
|
2019-10-05 18:38:57 +00:00
|
|
|
if self.has_part("line") and url_data.line is not None:
|
2011-04-06 09:12:05 +00:00
|
|
|
row.append(url_data.line)
|
2019-10-05 18:38:57 +00:00
|
|
|
if self.has_part("column") and url_data.column is not None:
|
2011-04-06 09:12:05 +00:00
|
|
|
row.append(url_data.column)
|
|
|
|
|
if self.has_part("name"):
|
|
|
|
|
row.append(url_data.name)
|
|
|
|
|
if self.has_part("dltime"):
|
|
|
|
|
row.append(url_data.dltime)
|
|
|
|
|
if self.has_part("dlsize"):
|
2014-02-28 23:12:34 +00:00
|
|
|
row.append(url_data.size)
|
2011-04-06 09:12:05 +00:00
|
|
|
if self.has_part("checktime"):
|
|
|
|
|
row.append(url_data.checktime)
|
|
|
|
|
if self.has_part("cached"):
|
2012-11-06 20:34:22 +00:00
|
|
|
row.append(0)
|
2011-04-09 08:51:03 +00:00
|
|
|
if self.has_part("level"):
|
|
|
|
|
row.append(url_data.level)
|
2012-09-18 10:12:00 +00:00
|
|
|
if self.has_part("modified"):
|
2012-09-19 09:05:26 +00:00
|
|
|
row.append(self.format_modified(url_data.modified))
|
2020-07-07 16:25:28 +00:00
|
|
|
self.writerow(row)
|
2004-08-16 19:28:42 +00:00
|
|
|
self.flush()
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def writerow(self, row):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Write one row in CSV format."""
|
2020-05-16 16:02:00 +00:00
|
|
|
self.writer.writerow(row)
|
2014-03-06 21:40:52 +00:00
|
|
|
# Fetch UTF-8 output from the queue ...
|
|
|
|
|
data = self.queue.getvalue()
|
2016-12-04 00:29:27 +00:00
|
|
|
try:
|
|
|
|
|
data = data.decode("utf-8")
|
|
|
|
|
except AttributeError:
|
|
|
|
|
pass
|
2014-03-06 21:40:52 +00:00
|
|
|
# ... and write to the target stream
|
|
|
|
|
self.write(data)
|
|
|
|
|
# empty queue
|
2019-06-01 00:52:57 +00:00
|
|
|
self.queue.seek(0)
|
2014-03-06 21:40:52 +00:00
|
|
|
self.queue.truncate(0)
|
2010-11-21 19:48:50 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def end_output(self, **kwargs):
|
2011-02-14 20:06:34 +00:00
|
|
|
"""Write end of checking info as csv comment."""
|
2005-05-08 20:06:40 +00:00
|
|
|
if self.has_part("outro"):
|
2010-11-01 08:58:03 +00:00
|
|
|
self.write_outro()
|
2005-05-06 13:05:16 +00:00
|
|
|
self.close_fileoutput()
|