mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-22 15:14:44 +00:00
unicode fixes
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1899 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
abfd6c475d
commit
de68f20f89
5 changed files with 126 additions and 175 deletions
|
|
@ -25,6 +25,7 @@ except ImportError:
|
|||
|
||||
import linkcheck.threader
|
||||
import linkcheck.log
|
||||
from urlbase import stderr
|
||||
|
||||
class Consumer (object):
|
||||
"""consume urls from the url queue in a threaded manner"""
|
||||
|
|
@ -129,9 +130,9 @@ class Consumer (object):
|
|||
links = self.linknumber
|
||||
tocheck = self.cache.incoming_len()
|
||||
duration = linkcheck.strformat.strduration(curtime - start_time)
|
||||
print >> sys.stderr, _("Status: %5d URLs queued, "\
|
||||
"%4d URLs checked, %2d active threads, runtime %s")\
|
||||
% (tocheck, links, active, duration)
|
||||
print >> stderr, _("Status: %5d URLs queued, "
|
||||
"%4d URLs checked, %2d active threads, runtime %s") % \
|
||||
(tocheck, links, active, duration)
|
||||
finally:
|
||||
self.lock.release()
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,6 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
super(CSVLogger, self).__init__(**args)
|
||||
self.init_fileoutput(args)
|
||||
self.separator = args['separator']
|
||||
self.lineterminator = os.linesep
|
||||
|
||||
def start_output (self):
|
||||
"""print checking start info as csv comment"""
|
||||
|
|
@ -41,43 +40,42 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
if self.fd is None:
|
||||
return
|
||||
self.starttime = time.time()
|
||||
sep = os.linesep
|
||||
if self.has_field("intro"):
|
||||
self.fd.write("# "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.fd.write(self.lineterminator)
|
||||
self.fd.write("# "+(_("Get the newest version at %(url)s") % \
|
||||
{'url': linkcheck.configuration.Url}))
|
||||
self.fd.write(self.lineterminator)
|
||||
self.fd.write("# "+(_("Write comments and bugs to %(email)s") % \
|
||||
{'email': linkcheck.configuration.Email}))
|
||||
self.fd.write(self.lineterminator)
|
||||
self.fd.write(_("# Format of the entries:")+self.lineterminator+
|
||||
"# urlname;"+self.lineterminator+
|
||||
"# recursionlevel;"+self.lineterminator+
|
||||
"# parentname;"+self.lineterminator+
|
||||
"# baseref;"+self.lineterminator+
|
||||
"# result;"+self.lineterminator+
|
||||
"# warningstring;"+self.lineterminator+
|
||||
"# infostring;"+self.lineterminator+
|
||||
"# valid;"+self.lineterminator+
|
||||
"# url;"+self.lineterminator+
|
||||
"# line;"+self.lineterminator+
|
||||
"# column;"+self.lineterminator+
|
||||
"# name;"+self.lineterminator+
|
||||
"# dltime;"+self.lineterminator+
|
||||
"# dlsize;"+self.lineterminator+
|
||||
"# checktime;"+self.lineterminator+
|
||||
"# cached;"+self.lineterminator)
|
||||
self.writeln(u"# "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.writeln(u"# "+(_("Get the newest version at %(url)s") % \
|
||||
{'url': linkcheck.configuration.Url}))
|
||||
self.writeln(u"# "+(_("Write comments and bugs to %(email)s") % \
|
||||
{'email': linkcheck.configuration.Email}))
|
||||
self.writeln(_("# Format of the entries:")+sep+
|
||||
u"# urlname;"+sep+
|
||||
u"# recursionlevel;"+sep+
|
||||
u"# parentname;"+sep+
|
||||
u"# baseref;"+sep+
|
||||
u"# result;"+sep+
|
||||
u"# warningstring;"+sep+
|
||||
u"# infostring;"+sep+
|
||||
u"# valid;"+sep+
|
||||
u"# url;"+sep+
|
||||
u"# line;"+sep+
|
||||
u"# column;"+sep+
|
||||
u"# name;"+sep+
|
||||
u"# dltime;"+sep+
|
||||
u"# dlsize;"+sep+
|
||||
u"# checktime;"+sep+
|
||||
u"# cached;")
|
||||
self.flush()
|
||||
self.writer = csv.writer(self.fd, dialect='excel',
|
||||
delimiter=self.separator, lineterminator=self.lineterminator)
|
||||
delimiter=self.separator, lineterminator=sep)
|
||||
|
||||
def new_url (self, url_data):
|
||||
"""print csv formatted url check info"""
|
||||
if self.fd is None:
|
||||
return
|
||||
row = [url_data.base_url, url_data.recursion_level,
|
||||
row = []
|
||||
for s in [url_data.base_url, url_data.recursion_level,
|
||||
url_data.parent_url or "", url_data.base_ref or "",
|
||||
url_data.result,
|
||||
os.linesep.join(url_data.warning),
|
||||
|
|
@ -86,7 +84,11 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
url_data.line, url_data.column,
|
||||
url_data.name, url_data.dltime,
|
||||
url_data.dlsize, url_data.checktime,
|
||||
url_data.cached]
|
||||
url_data.cached]:
|
||||
if isinstance(s, unicode):
|
||||
row.append(s.encode(self.output_encoding, "ignore"))
|
||||
else:
|
||||
row.append(s)
|
||||
self.writer.writerow(row)
|
||||
self.flush()
|
||||
|
||||
|
|
@ -97,10 +99,9 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
self.stoptime = time.time()
|
||||
if self.has_field("outro"):
|
||||
duration = self.stoptime - self.starttime
|
||||
self.fd.write("# "+_("Stopped checking at %s (%s)%s")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration),
|
||||
self.lineterminator))
|
||||
self.writeln(u"# "+_("Stopped checking at %s (%s)") % \
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.flush()
|
||||
if self.close_fd:
|
||||
self.fd.close()
|
||||
|
|
|
|||
|
|
@ -41,21 +41,16 @@ class GMLLogger (linkcheck.logger.Logger):
|
|||
return
|
||||
self.starttime = time.time()
|
||||
if self.has_field("intro"):
|
||||
self.fd.write("# "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("# "+(_("Get the newest version at %(url)s") %\
|
||||
{'url': linkcheck.configuration.Url}))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("# "+(_("Write comments and bugs to %(email)s") % \
|
||||
{'email': linkcheck.configuration.Email}))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("graph [")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" directed 1")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"# "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.writeln(u"# "+(_("Get the newest version at %(url)s") % \
|
||||
{'url': linkcheck.configuration.Url}))
|
||||
self.writeln(u"# "+(_("Write comments and bugs to %(email)s") % \
|
||||
{'email': linkcheck.configuration.Email}))
|
||||
self.writeln()
|
||||
self.writeln(u"graph [")
|
||||
self.writeln(u" directed 1")
|
||||
self.flush()
|
||||
|
||||
def new_url (self, url_data):
|
||||
|
|
@ -67,27 +62,19 @@ class GMLLogger (linkcheck.logger.Logger):
|
|||
node.id = self.nodeid
|
||||
self.nodes[node.url] = node
|
||||
self.nodeid += 1
|
||||
self.fd.write(" node [")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" id %d" % node.id)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" node [")
|
||||
self.writeln(u" id %d" % node.id)
|
||||
if self.has_field("realurl"):
|
||||
self.fd.write(' label "%s"' % node.url)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u' label "%s"' % node.url)
|
||||
if node.dltime >= 0 and self.has_field("dltime"):
|
||||
self.fd.write(" dltime %d" % node.dltime)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" dltime %d" % node.dltime)
|
||||
if node.dlsize >= 0 and self.has_field("dlsize"):
|
||||
self.fd.write(" dlsize %d" % node.dlsize)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" dlsize %d" % node.dlsize)
|
||||
if node.checktime and self.has_field("checktime"):
|
||||
self.fd.write(" checktime %d" % node.checktime)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" checktime %d" % node.checktime)
|
||||
if self.has_field("extern"):
|
||||
self.fd.write(" extern %d" % (node.extern and 1 or 0))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" ]")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" extern %d" % (node.extern and 1 or 0))
|
||||
self.writeln(u" ]")
|
||||
self.write_edges()
|
||||
|
||||
def write_edges (self):
|
||||
|
|
@ -96,36 +83,28 @@ class GMLLogger (linkcheck.logger.Logger):
|
|||
"""
|
||||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parent_url):
|
||||
self.fd.write(" edge [")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(' label "%s"' % node.base_url)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" edge [")
|
||||
self.writeln(u' label "%s"' % node.base_url)
|
||||
if self.has_field("parenturl"):
|
||||
self.fd.write(" source %d" % \
|
||||
self.nodes[node.parent_url].id)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" target %d" % node.id)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" source %d" % \
|
||||
self.nodes[node.parent_url].id)
|
||||
self.writeln(u" target %d" % node.id)
|
||||
if self.has_field("result"):
|
||||
self.fd.write(" valid %d" % (node.valid and 1 or 0))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" ]")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" valid %d" % (node.valid and 1 or 0))
|
||||
self.writeln(u" ]")
|
||||
self.flush()
|
||||
|
||||
def end_output (self, linknumber=-1):
|
||||
"""print end of checking info as gml comment"""
|
||||
if self.fd is None:
|
||||
return
|
||||
self.fd.write("]")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"]")
|
||||
if self.has_field("outro"):
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
self.fd.write("# "+_("Stopped checking at %s (%s)")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"# "+_("Stopped checking at %s (%s)")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.flush()
|
||||
if self.close_fd:
|
||||
self.fd.close()
|
||||
|
|
|
|||
|
|
@ -29,11 +29,13 @@ def sqlify (s):
|
|||
return "NULL"
|
||||
return "'%s'" % s.replace("'", "''")
|
||||
|
||||
|
||||
def intify (s):
|
||||
if not s:
|
||||
return 0
|
||||
return 1
|
||||
|
||||
|
||||
class SQLLogger (linkcheck.logger.Logger):
|
||||
"""SQL output for PostgreSQL, not tested"""
|
||||
|
||||
|
|
@ -51,24 +53,21 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
return
|
||||
self.starttime = time.time()
|
||||
if self.has_field("intro"):
|
||||
self.fd.write("-- "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("-- "+(_("Get the newest version at %s") % \
|
||||
linkcheck.configuration.Url))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("-- "+(_("Write comments and bugs to %s") % \
|
||||
linkcheck.configuration.Email))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"-- "+(_("created by %s at %s") % \
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime))))
|
||||
self.writeln(u"-- "+(_("Get the newest version at %s") % \
|
||||
linkcheck.configuration.Url))
|
||||
self.writeln(u"-- "+(_("Write comments and bugs to %s") % \
|
||||
linkcheck.configuration.Email))
|
||||
self.writeln()
|
||||
self.flush()
|
||||
|
||||
def new_url (self, url_data):
|
||||
"""store url check info into the database"""
|
||||
if self.fd is None:
|
||||
return
|
||||
self.fd.write("insert into %(table)s(urlname,recursionlevel,"
|
||||
self.writeln(u"insert into %(table)s(urlname,recursionlevel,"
|
||||
"parentname,baseref,valid,result,warning,info,url,line,col,"
|
||||
"name,checktime,dltime,dlsize,cached) values ("
|
||||
"%(base_url)s,"
|
||||
|
|
@ -107,7 +106,6 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
'cached': intify(url_data.cached),
|
||||
'separator': self.separator,
|
||||
})
|
||||
self.fd.write(os.linesep)
|
||||
self.flush()
|
||||
|
||||
def end_output (self, linknumber=-1):
|
||||
|
|
@ -117,10 +115,9 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
if self.has_field("outro"):
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
self.fd.write("-- "+_("Stopped checking at %s (%s)")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"-- "+_("Stopped checking at %s (%s)")%\
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.flush()
|
||||
if self.close_fd:
|
||||
self.fd.close()
|
||||
|
|
|
|||
|
|
@ -69,63 +69,47 @@ class XMLLogger (linkcheck.logger.Logger):
|
|||
if self.fd is None:
|
||||
return
|
||||
self.starttime = time.time()
|
||||
self.fd.write('<?xml version="1.0"?>')
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u'<?xml version="1.0"?>')
|
||||
if self.has_field("intro"):
|
||||
self.fd.write("<!--")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" "+_("created by %s at %s") %
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime)))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" "+_("Get the newest version at %s") %
|
||||
linkcheck.configuration.Url)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" "+_("Write comments and bugs to %s") %
|
||||
linkcheck.configuration.Email)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("-->")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write('<GraphXML>\n<graph isDirected="true">')
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"<!--")
|
||||
self.writeln(u" "+_("created by %s at %s") %
|
||||
(linkcheck.configuration.AppName,
|
||||
linkcheck.strformat.strtime(self.starttime)))
|
||||
self.writeln(u" "+_("Get the newest version at %s") %
|
||||
linkcheck.configuration.Url)
|
||||
self.writeln(u" "+_("Write comments and bugs to %s") %
|
||||
linkcheck.configuration.Email)
|
||||
self.writeln(u"-->")
|
||||
self.writeln()
|
||||
self.writeln(u'<GraphXML>')
|
||||
self.writeln(u'<graph isDirected="true">')
|
||||
self.flush()
|
||||
|
||||
def new_url (self, url_data):
|
||||
"""write one node and all possible edges"""
|
||||
if self.fd is None: return
|
||||
if self.fd is None:
|
||||
return
|
||||
node = url_data
|
||||
if node.url and not self.nodes.has_key(node.url):
|
||||
node.id = self.nodeid
|
||||
self.nodes[node.url] = node
|
||||
self.nodeid += 1
|
||||
self.fd.write(' <node name="%d" ' % node.id)
|
||||
self.fd.write(">\n")
|
||||
self.writeln(u' <node name="%d">' % node.id)
|
||||
if self.has_field("realurl"):
|
||||
self.fd.write(" <label>%s</label>" % \
|
||||
xmlquote(node.url))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" <data>")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <label>%s</label>" % xmlquote(node.url))
|
||||
self.writeln(u" <data>")
|
||||
if node.dltime >= 0 and self.has_field("dltime"):
|
||||
self.fd.write(" <dltime>%f</dltime>" % node.dltime)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <dltime>%f</dltime>" % node.dltime)
|
||||
if node.dlsize >= 0 and self.has_field("dlsize"):
|
||||
self.fd.write(" <dlsize>%d</dlsize>" % node.dlsize)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <dlsize>%d</dlsize>" % node.dlsize)
|
||||
if node.checktime and self.has_field("checktime"):
|
||||
self.fd.write(" <checktime>%f</checktime>" \
|
||||
% node.checktime)
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <checktime>%f</checktime>" %
|
||||
node.checktime)
|
||||
if self.has_field("extern"):
|
||||
self.fd.write(" <extern>%d</extern>" % \
|
||||
(node.extern and 1 or 0))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" </data>")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" </node>")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <extern>%d</extern>" %
|
||||
(node.extern and 1 or 0))
|
||||
self.writeln(u" </data>")
|
||||
self.writeln(u" </node>")
|
||||
self.write_edges()
|
||||
|
||||
def write_edges (self):
|
||||
|
|
@ -134,26 +118,18 @@ class XMLLogger (linkcheck.logger.Logger):
|
|||
"""
|
||||
for node in self.nodes.values():
|
||||
if self.nodes.has_key(node.parent_url):
|
||||
self.fd.write(" <edge")
|
||||
self.fd.write(' source="%d"' % \
|
||||
self.nodes[node.parent_url].id)
|
||||
self.fd.write(' target="%d"' % node.id)
|
||||
self.fd.write(">")
|
||||
self.fd.write(os.linesep)
|
||||
self.write(u" <edge")
|
||||
self.write(u' source="%d"' % self.nodes[node.parent_url].id)
|
||||
self.writeln(u' target="%d">' % node.id)
|
||||
if self.has_field("url"):
|
||||
self.fd.write(" <label>%s</label>" % \
|
||||
xmlquote(node.base_url))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" <data>")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <label>%s</label>" % \
|
||||
xmlquote(node.base_url))
|
||||
self.writeln(u" <data>")
|
||||
if self.has_field("result"):
|
||||
self.fd.write(" <valid>%d</valid>" % \
|
||||
(node.valid and 1 or 0))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" </data>")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write(" </edge>")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u" <valid>%d</valid>" % \
|
||||
(node.valid and 1 or 0))
|
||||
self.writeln(u" </data>")
|
||||
self.writeln(u" </edge>")
|
||||
self.flush()
|
||||
|
||||
def end_output (self, linknumber=-1):
|
||||
|
|
@ -162,19 +138,16 @@ class XMLLogger (linkcheck.logger.Logger):
|
|||
"""
|
||||
if self.fd is None:
|
||||
return
|
||||
self.fd.write("</graph>")
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("</GraphXML>")
|
||||
self.fd.write(os.linesep)
|
||||
self.writeln(u"</graph>")
|
||||
self.writeln(u"</GraphXML>")
|
||||
if self.has_field("outro"):
|
||||
self.stoptime = time.time()
|
||||
duration = self.stoptime - self.starttime
|
||||
self.fd.write("<!-- ")
|
||||
self.fd.write(_("Stopped checking at %s (%s)") % \
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.fd.write(os.linesep)
|
||||
self.fd.write("-->")
|
||||
self.writeln(u"<!-- ")
|
||||
self.writeln(_("Stopped checking at %s (%s)") % \
|
||||
(linkcheck.strformat.strtime(self.stoptime),
|
||||
linkcheck.strformat.strduration(duration)))
|
||||
self.writeln(u"-->")
|
||||
self.flush()
|
||||
if self.close_fd:
|
||||
self.fd.close()
|
||||
|
|
|
|||
Loading…
Reference in a new issue