From a3e4780b38a2302d2f70f3105c7fe7c5a7327118 Mon Sep 17 00:00:00 2001 From: calvin Date: Thu, 2 Feb 2006 21:40:57 +0000 Subject: [PATCH] quote attributes with unicode entity escapes git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3039 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/HtmlParser/htmllib.py | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py index f116435f..819d4fa9 100644 --- a/linkcheck/HtmlParser/htmllib.py +++ b/linkcheck/HtmlParser/htmllib.py @@ -199,9 +199,19 @@ def quote_attrval (s): @return: the quoted HTML attribute @rtype: string """ - s = s.replace('&', "&") - s = s.replace('"', """) - return s + res = [] + for c in s: + if ord(c) <= 127: + # ASCII + if c == u'&': + res.append(u"&") + elif c == u'"': + res.append(u""") + else: + res.append(c) + else: + res.append(u"&#%d;" % ord(c)) + return u"".join(res) def quote_val (s):