diff --git a/linkcheck/HtmlParser/htmllib.py b/linkcheck/HtmlParser/htmllib.py
index 0a8980e3..2a1dfc2b 100644
--- a/linkcheck/HtmlParser/htmllib.py
+++ b/linkcheck/HtmlParser/htmllib.py
@@ -99,7 +99,7 @@ class HtmlPrettyPrinter (object):
@type attrs: dict
@return: None
"""
- self._start_element(tag, attrs, ">")
+ self._start_element(tag, attrs, u">")
def start_end_element (self, tag, attrs, element_text=None):
"""
@@ -111,7 +111,7 @@ class HtmlPrettyPrinter (object):
@type attrs: dict
@return: None
"""
- self._start_element(tag, attrs, "/>")
+ self._start_element(tag, attrs, u"/>")
def _start_element (self, tag, attrs, end):
"""
@@ -125,12 +125,12 @@ class HtmlPrettyPrinter (object):
@type end: string
@return: None
"""
- self.fd.write("<%s" % tag.replace("/", ""))
+ self.fd.write(u"<%s" % tag.replace("/", ""))
for key, val in attrs.items():
if val is None:
- self.fd.write(" %s" % key)
+ self.fd.write(u" %s" % key)
else:
- self.fd.write(' %s="%s"' % (key, quote_attrval(val)))
+ self.fd.write(u' %s="%s"' % (key, quote_attrval(val)))
self.fd.write(end)
def end_element (self, tag):
diff --git a/tests/test_parser.py b/tests/test_parser.py
index 4465c19b..054d73e5 100644
--- a/tests/test_parser.py
+++ b/tests/test_parser.py
@@ -1,4 +1,4 @@
-# -*- coding: iso-8859-1 -*-
+# -*- coding: utf8 -*-
# Copyright (C) 2004-2012 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
@@ -54,9 +54,9 @@ parsetests = [
("""< a >""", """< a >"""),
("""<>""", """<>"""),
("""< >""", """< >"""),
- ("""""", u""""""),
- ("""""", u""""""),
- ("""""", u""""""),
+ ("""""", u""""""),
+ ("""""", u""""""),
+ ("""""", u""""""),
# multiple attribute names should be ignored...
("""""", """"""),
# ... but which one wins - in our implementation the last one
@@ -97,7 +97,7 @@ parsetests = [
("""""", """"""),
("""< / a>""", """< / a>"""),
("""< /a>""", """< /a>"""),
- ("""""", """"""),
+ ("""""", """"""),
# start and end tag (HTML doctype assumed)
("""""", """"""),
("""""", """"""),
@@ -144,8 +144,8 @@ parsetests = [
# note that \u8156 is not valid encoding and therefore gets removed
("""""", """"""),
# non-ascii characters
- ("""<Üzgür> fahr ¿¿¿¿¿¿{""",
- u"""<Üzgür> fahr ¿¿¿¿¿¿{"""),
+ ("""<Üzgür> fahr żżżżżż{""",
+ u"""<Üzgür> fahr żżżżżż{"""),
# mailto link
("""1""",
"""1"""),