unicode fixes

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1899 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2004-10-27 22:34:50 +00:00
parent abfd6c475d
commit de68f20f89
5 changed files with 126 additions and 175 deletions

View file

@ -25,6 +25,7 @@ except ImportError:
import linkcheck.threader
import linkcheck.log
from urlbase import stderr
class Consumer (object):
"""consume urls from the url queue in a threaded manner"""
@ -129,9 +130,9 @@ class Consumer (object):
links = self.linknumber
tocheck = self.cache.incoming_len()
duration = linkcheck.strformat.strduration(curtime - start_time)
print >> sys.stderr, _("Status: %5d URLs queued, "\
"%4d URLs checked, %2d active threads, runtime %s")\
% (tocheck, links, active, duration)
print >> stderr, _("Status: %5d URLs queued, "
"%4d URLs checked, %2d active threads, runtime %s") % \
(tocheck, links, active, duration)
finally:
self.lock.release()

View file

@ -33,7 +33,6 @@ class CSVLogger (linkcheck.logger.Logger):
super(CSVLogger, self).__init__(**args)
self.init_fileoutput(args)
self.separator = args['separator']
self.lineterminator = os.linesep
def start_output (self):
"""print checking start info as csv comment"""
@ -41,43 +40,42 @@ class CSVLogger (linkcheck.logger.Logger):
if self.fd is None:
return
self.starttime = time.time()
sep = os.linesep
if self.has_field("intro"):
self.fd.write("# "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.fd.write(self.lineterminator)
self.fd.write("# "+(_("Get the newest version at %(url)s") % \
{'url': linkcheck.configuration.Url}))
self.fd.write(self.lineterminator)
self.fd.write("# "+(_("Write comments and bugs to %(email)s") % \
{'email': linkcheck.configuration.Email}))
self.fd.write(self.lineterminator)
self.fd.write(_("# Format of the entries:")+self.lineterminator+
"# urlname;"+self.lineterminator+
"# recursionlevel;"+self.lineterminator+
"# parentname;"+self.lineterminator+
"# baseref;"+self.lineterminator+
"# result;"+self.lineterminator+
"# warningstring;"+self.lineterminator+
"# infostring;"+self.lineterminator+
"# valid;"+self.lineterminator+
"# url;"+self.lineterminator+
"# line;"+self.lineterminator+
"# column;"+self.lineterminator+
"# name;"+self.lineterminator+
"# dltime;"+self.lineterminator+
"# dlsize;"+self.lineterminator+
"# checktime;"+self.lineterminator+
"# cached;"+self.lineterminator)
self.writeln(u"# "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.writeln(u"# "+(_("Get the newest version at %(url)s") % \
{'url': linkcheck.configuration.Url}))
self.writeln(u"# "+(_("Write comments and bugs to %(email)s") % \
{'email': linkcheck.configuration.Email}))
self.writeln(_("# Format of the entries:")+sep+
u"# urlname;"+sep+
u"# recursionlevel;"+sep+
u"# parentname;"+sep+
u"# baseref;"+sep+
u"# result;"+sep+
u"# warningstring;"+sep+
u"# infostring;"+sep+
u"# valid;"+sep+
u"# url;"+sep+
u"# line;"+sep+
u"# column;"+sep+
u"# name;"+sep+
u"# dltime;"+sep+
u"# dlsize;"+sep+
u"# checktime;"+sep+
u"# cached;")
self.flush()
self.writer = csv.writer(self.fd, dialect='excel',
delimiter=self.separator, lineterminator=self.lineterminator)
delimiter=self.separator, lineterminator=sep)
def new_url (self, url_data):
"""print csv formatted url check info"""
if self.fd is None:
return
row = [url_data.base_url, url_data.recursion_level,
row = []
for s in [url_data.base_url, url_data.recursion_level,
url_data.parent_url or "", url_data.base_ref or "",
url_data.result,
os.linesep.join(url_data.warning),
@ -86,7 +84,11 @@ class CSVLogger (linkcheck.logger.Logger):
url_data.line, url_data.column,
url_data.name, url_data.dltime,
url_data.dlsize, url_data.checktime,
url_data.cached]
url_data.cached]:
if isinstance(s, unicode):
row.append(s.encode(self.output_encoding, "ignore"))
else:
row.append(s)
self.writer.writerow(row)
self.flush()
@ -97,10 +99,9 @@ class CSVLogger (linkcheck.logger.Logger):
self.stoptime = time.time()
if self.has_field("outro"):
duration = self.stoptime - self.starttime
self.fd.write("# "+_("Stopped checking at %s (%s)%s")%\
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration),
self.lineterminator))
self.writeln(u"# "+_("Stopped checking at %s (%s)") % \
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.flush()
if self.close_fd:
self.fd.close()

View file

@ -41,21 +41,16 @@ class GMLLogger (linkcheck.logger.Logger):
return
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("# "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.fd.write(os.linesep)
self.fd.write("# "+(_("Get the newest version at %(url)s") %\
{'url': linkcheck.configuration.Url}))
self.fd.write(os.linesep)
self.fd.write("# "+(_("Write comments and bugs to %(email)s") % \
{'email': linkcheck.configuration.Email}))
self.fd.write(os.linesep)
self.fd.write(os.linesep)
self.fd.write("graph [")
self.fd.write(os.linesep)
self.fd.write(" directed 1")
self.fd.write(os.linesep)
self.writeln(u"# "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.writeln(u"# "+(_("Get the newest version at %(url)s") % \
{'url': linkcheck.configuration.Url}))
self.writeln(u"# "+(_("Write comments and bugs to %(email)s") % \
{'email': linkcheck.configuration.Email}))
self.writeln()
self.writeln(u"graph [")
self.writeln(u" directed 1")
self.flush()
def new_url (self, url_data):
@ -67,27 +62,19 @@ class GMLLogger (linkcheck.logger.Logger):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.fd.write(" node [")
self.fd.write(os.linesep)
self.fd.write(" id %d" % node.id)
self.fd.write(os.linesep)
self.writeln(u" node [")
self.writeln(u" id %d" % node.id)
if self.has_field("realurl"):
self.fd.write(' label "%s"' % node.url)
self.fd.write(os.linesep)
self.writeln(u' label "%s"' % node.url)
if node.dltime >= 0 and self.has_field("dltime"):
self.fd.write(" dltime %d" % node.dltime)
self.fd.write(os.linesep)
self.writeln(u" dltime %d" % node.dltime)
if node.dlsize >= 0 and self.has_field("dlsize"):
self.fd.write(" dlsize %d" % node.dlsize)
self.fd.write(os.linesep)
self.writeln(u" dlsize %d" % node.dlsize)
if node.checktime and self.has_field("checktime"):
self.fd.write(" checktime %d" % node.checktime)
self.fd.write(os.linesep)
self.writeln(u" checktime %d" % node.checktime)
if self.has_field("extern"):
self.fd.write(" extern %d" % (node.extern and 1 or 0))
self.fd.write(os.linesep)
self.fd.write(" ]")
self.fd.write(os.linesep)
self.writeln(u" extern %d" % (node.extern and 1 or 0))
self.writeln(u" ]")
self.write_edges()
def write_edges (self):
@ -96,36 +83,28 @@ class GMLLogger (linkcheck.logger.Logger):
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parent_url):
self.fd.write(" edge [")
self.fd.write(os.linesep)
self.fd.write(' label "%s"' % node.base_url)
self.fd.write(os.linesep)
self.writeln(u" edge [")
self.writeln(u' label "%s"' % node.base_url)
if self.has_field("parenturl"):
self.fd.write(" source %d" % \
self.nodes[node.parent_url].id)
self.fd.write(os.linesep)
self.fd.write(" target %d" % node.id)
self.fd.write(os.linesep)
self.writeln(u" source %d" % \
self.nodes[node.parent_url].id)
self.writeln(u" target %d" % node.id)
if self.has_field("result"):
self.fd.write(" valid %d" % (node.valid and 1 or 0))
self.fd.write(os.linesep)
self.fd.write(" ]")
self.fd.write(os.linesep)
self.writeln(u" valid %d" % (node.valid and 1 or 0))
self.writeln(u" ]")
self.flush()
def end_output (self, linknumber=-1):
"""print end of checking info as gml comment"""
if self.fd is None:
return
self.fd.write("]")
self.fd.write(os.linesep)
self.writeln(u"]")
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("# "+_("Stopped checking at %s (%s)")%\
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.fd.write(os.linesep)
self.writeln(u"# "+_("Stopped checking at %s (%s)")%\
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.flush()
if self.close_fd:
self.fd.close()

View file

@ -29,11 +29,13 @@ def sqlify (s):
return "NULL"
return "'%s'" % s.replace("'", "''")
def intify (s):
if not s:
return 0
return 1
class SQLLogger (linkcheck.logger.Logger):
"""SQL output for PostgreSQL, not tested"""
@ -51,24 +53,21 @@ class SQLLogger (linkcheck.logger.Logger):
return
self.starttime = time.time()
if self.has_field("intro"):
self.fd.write("-- "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.fd.write(os.linesep)
self.fd.write("-- "+(_("Get the newest version at %s") % \
linkcheck.configuration.Url))
self.fd.write(os.linesep)
self.fd.write("-- "+(_("Write comments and bugs to %s") % \
linkcheck.configuration.Email))
self.fd.write(os.linesep)
self.fd.write(os.linesep)
self.writeln(u"-- "+(_("created by %s at %s") % \
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime))))
self.writeln(u"-- "+(_("Get the newest version at %s") % \
linkcheck.configuration.Url))
self.writeln(u"-- "+(_("Write comments and bugs to %s") % \
linkcheck.configuration.Email))
self.writeln()
self.flush()
def new_url (self, url_data):
"""store url check info into the database"""
if self.fd is None:
return
self.fd.write("insert into %(table)s(urlname,recursionlevel,"
self.writeln(u"insert into %(table)s(urlname,recursionlevel,"
"parentname,baseref,valid,result,warning,info,url,line,col,"
"name,checktime,dltime,dlsize,cached) values ("
"%(base_url)s,"
@ -107,7 +106,6 @@ class SQLLogger (linkcheck.logger.Logger):
'cached': intify(url_data.cached),
'separator': self.separator,
})
self.fd.write(os.linesep)
self.flush()
def end_output (self, linknumber=-1):
@ -117,10 +115,9 @@ class SQLLogger (linkcheck.logger.Logger):
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("-- "+_("Stopped checking at %s (%s)")%\
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.fd.write(os.linesep)
self.writeln(u"-- "+_("Stopped checking at %s (%s)")%\
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.flush()
if self.close_fd:
self.fd.close()

View file

@ -69,63 +69,47 @@ class XMLLogger (linkcheck.logger.Logger):
if self.fd is None:
return
self.starttime = time.time()
self.fd.write('<?xml version="1.0"?>')
self.fd.write(os.linesep)
self.writeln(u'<?xml version="1.0"?>')
if self.has_field("intro"):
self.fd.write("<!--")
self.fd.write(os.linesep)
self.fd.write(" "+_("created by %s at %s") %
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime)))
self.fd.write(os.linesep)
self.fd.write(" "+_("Get the newest version at %s") %
linkcheck.configuration.Url)
self.fd.write(os.linesep)
self.fd.write(" "+_("Write comments and bugs to %s") %
linkcheck.configuration.Email)
self.fd.write(os.linesep)
self.fd.write(os.linesep)
self.fd.write("-->")
self.fd.write(os.linesep)
self.fd.write(os.linesep)
self.fd.write('<GraphXML>\n<graph isDirected="true">')
self.fd.write(os.linesep)
self.writeln(u"<!--")
self.writeln(u" "+_("created by %s at %s") %
(linkcheck.configuration.AppName,
linkcheck.strformat.strtime(self.starttime)))
self.writeln(u" "+_("Get the newest version at %s") %
linkcheck.configuration.Url)
self.writeln(u" "+_("Write comments and bugs to %s") %
linkcheck.configuration.Email)
self.writeln(u"-->")
self.writeln()
self.writeln(u'<GraphXML>')
self.writeln(u'<graph isDirected="true">')
self.flush()
def new_url (self, url_data):
"""write one node and all possible edges"""
if self.fd is None: return
if self.fd is None:
return
node = url_data
if node.url and not self.nodes.has_key(node.url):
node.id = self.nodeid
self.nodes[node.url] = node
self.nodeid += 1
self.fd.write(' <node name="%d" ' % node.id)
self.fd.write(">\n")
self.writeln(u' <node name="%d">' % node.id)
if self.has_field("realurl"):
self.fd.write(" <label>%s</label>" % \
xmlquote(node.url))
self.fd.write(os.linesep)
self.fd.write(" <data>")
self.fd.write(os.linesep)
self.writeln(u" <label>%s</label>" % xmlquote(node.url))
self.writeln(u" <data>")
if node.dltime >= 0 and self.has_field("dltime"):
self.fd.write(" <dltime>%f</dltime>" % node.dltime)
self.fd.write(os.linesep)
self.writeln(u" <dltime>%f</dltime>" % node.dltime)
if node.dlsize >= 0 and self.has_field("dlsize"):
self.fd.write(" <dlsize>%d</dlsize>" % node.dlsize)
self.fd.write(os.linesep)
self.writeln(u" <dlsize>%d</dlsize>" % node.dlsize)
if node.checktime and self.has_field("checktime"):
self.fd.write(" <checktime>%f</checktime>" \
% node.checktime)
self.fd.write(os.linesep)
self.writeln(u" <checktime>%f</checktime>" %
node.checktime)
if self.has_field("extern"):
self.fd.write(" <extern>%d</extern>" % \
(node.extern and 1 or 0))
self.fd.write(os.linesep)
self.fd.write(" </data>")
self.fd.write(os.linesep)
self.fd.write(" </node>")
self.fd.write(os.linesep)
self.writeln(u" <extern>%d</extern>" %
(node.extern and 1 or 0))
self.writeln(u" </data>")
self.writeln(u" </node>")
self.write_edges()
def write_edges (self):
@ -134,26 +118,18 @@ class XMLLogger (linkcheck.logger.Logger):
"""
for node in self.nodes.values():
if self.nodes.has_key(node.parent_url):
self.fd.write(" <edge")
self.fd.write(' source="%d"' % \
self.nodes[node.parent_url].id)
self.fd.write(' target="%d"' % node.id)
self.fd.write(">")
self.fd.write(os.linesep)
self.write(u" <edge")
self.write(u' source="%d"' % self.nodes[node.parent_url].id)
self.writeln(u' target="%d">' % node.id)
if self.has_field("url"):
self.fd.write(" <label>%s</label>" % \
xmlquote(node.base_url))
self.fd.write(os.linesep)
self.fd.write(" <data>")
self.fd.write(os.linesep)
self.writeln(u" <label>%s</label>" % \
xmlquote(node.base_url))
self.writeln(u" <data>")
if self.has_field("result"):
self.fd.write(" <valid>%d</valid>" % \
(node.valid and 1 or 0))
self.fd.write(os.linesep)
self.fd.write(" </data>")
self.fd.write(os.linesep)
self.fd.write(" </edge>")
self.fd.write(os.linesep)
self.writeln(u" <valid>%d</valid>" % \
(node.valid and 1 or 0))
self.writeln(u" </data>")
self.writeln(u" </edge>")
self.flush()
def end_output (self, linknumber=-1):
@ -162,19 +138,16 @@ class XMLLogger (linkcheck.logger.Logger):
"""
if self.fd is None:
return
self.fd.write("</graph>")
self.fd.write(os.linesep)
self.fd.write("</GraphXML>")
self.fd.write(os.linesep)
self.writeln(u"</graph>")
self.writeln(u"</GraphXML>")
if self.has_field("outro"):
self.stoptime = time.time()
duration = self.stoptime - self.starttime
self.fd.write("<!-- ")
self.fd.write(_("Stopped checking at %s (%s)") % \
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.fd.write(os.linesep)
self.fd.write("-->")
self.writeln(u"<!-- ")
self.writeln(_("Stopped checking at %s (%s)") % \
(linkcheck.strformat.strtime(self.stoptime),
linkcheck.strformat.strduration(duration)))
self.writeln(u"-->")
self.flush()
if self.close_fd:
self.fd.close()