From af5be26d2c84dcf7ecf968463b373bab8ead05cf Mon Sep 17 00:00:00 2001 From: calvin Date: Mon, 1 Mar 2004 15:38:56 +0000 Subject: [PATCH] use XmlUtils instead of xmlify for quoting git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1271 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/StringUtil.py | 26 ++------------------ linkcheck/XmlUtils.py | 50 ++++++++++++++++++++++++++++++++++++++ linkcheck/log/XMLLogger.py | 9 ++++--- 3 files changed, 57 insertions(+), 28 deletions(-) create mode 100644 linkcheck/XmlUtils.py diff --git a/linkcheck/StringUtil.py b/linkcheck/StringUtil.py index 0429f944..84a2015e 100644 --- a/linkcheck/StringUtil.py +++ b/linkcheck/StringUtil.py @@ -26,20 +26,6 @@ UnHtmlTable = [ ("&"+x[0]+";", x[1]) for x in entities ] HtmlTable.sort() UnHtmlTable.sort() UnHtmlTable.reverse() -# standard xml entities -entities = { - 'lt': '<', - 'gt': '>', - 'amp': '&', - 'quot': '"', - 'apos': "'", -}.items() -XmlTable = [ (x[1], "&"+x[0]+";") for x in entities ] -UnXmlTable = [ ("&"+x[0]+";", x[1]) for x in entities ] -# order matters! -XmlTable.sort() -UnXmlTable.sort() -UnXmlTable.reverse() SQLTable = [ ("'","''") @@ -143,16 +129,6 @@ def unhtmlify (s): return re.sub(r'(?i)&(?P#x?\d+|[a-z]+);', resolve_entity, s) -def xmlify (s): - """quote characters for XML""" - return applyTable(XmlTable, s) - - -def unxmlify (s): - """unquote character from XML""" - return applyTable(UnXmlTable, s) - - def getLineNumber (s, index): "return the line number of str[index]" i=0 @@ -205,9 +181,11 @@ def strsize (b): b /= 1024.0 return "%.2f GB" + def _test (): print unhtmlify('a') print unhtmlify('&') + if __name__=='__main__': _test() diff --git a/linkcheck/XmlUtils.py b/linkcheck/XmlUtils.py new file mode 100644 index 00000000..d8c7014c --- /dev/null +++ b/linkcheck/XmlUtils.py @@ -0,0 +1,50 @@ +# -*- coding: iso-8859-1 -*- +"""XML utility functions""" +# Copyright (C) 2003-2004 Bastian Kleineidam +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program; if not, write to the Free Software +# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. + +__version__ = "$Revision$"[11:-2] +__date__ = "$Date$"[7:-2] + +from xml.sax.saxutils import escape, unescape + +attr_entities = { + "&": "&", + "<": "<", + ">": ">", + "\"": """, +} + + +def xmlquote (s): + """quote characters for XML""" + return escape(s) + + +def xmlquoteattr (s): + """quote XML attribute, ready for inclusion with double quotes""" + return escape(s, attr_entities) + + +def xmlunquote (s): + """unquote characters from XML""" + return unescape(s) + + +def xmlunquoteattr (s): + """unquote attributes from XML""" + return unescape(s, attr_entities) + diff --git a/linkcheck/log/XMLLogger.py b/linkcheck/log/XMLLogger.py index dd99ba8b..db020384 100644 --- a/linkcheck/log/XMLLogger.py +++ b/linkcheck/log/XMLLogger.py @@ -17,7 +17,7 @@ import time from linkcheck import Config, i18n -from linkcheck.StringUtil import xmlify +from linkcheck.XmlUtils import xmlquote from linkcheck.log import strtime, strduration from linkcheck.url import url_quote from StandardLogger import StandardLogger @@ -61,7 +61,7 @@ class XMLLogger (StandardLogger): self.fd.write(">\n") if self.has_field("realurl"): self.fd.write(" \n" %\ - xmlify(url_quote(node.url))) + xmlquote(url_quote(node.url))) self.fd.write(" \n") if node.dltime>=0 and self.has_field("dltime"): self.fd.write(" %f\n" % node.dltime) @@ -90,11 +90,12 @@ class XMLLogger (StandardLogger): self.fd.write(' target="%d"' % node.id) self.fd.write(">\n") if self.has_field("url"): - self.fd.write(" \n" % xmlify(node.urlName)) + self.fd.write(" \n" % \ + xmlquote(node.urlName)) self.fd.write(" \n") if self.has_field("result"): self.fd.write(" %d\n" % \ - (node.valid and 1 or 0)) + (node.valid and 1 or 0)) self.fd.write(" \n") self.fd.write(" \n") self.fd.flush()