# -*- coding: iso-8859-1 -*-
import bk.HtmlParser
import bk.HtmlParser.htmlsax
import bk.HtmlParser.htmllib
import cStringIO as StringIO
import unittest
parsetests = [
# start tags
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""< a>""", """"""),
("""< a >""", """"""),
("""<>""", """<>"""),
("""< >""", """< >"""),
# reduce test
("""<""", """<"""),
("""d>""", """d>"""),
# numbers in tag
("""bla
""", """bla
"""),
# more start tags
("""""", """"""),
("""""", """"""),
("""
""", """
"""),
("""
""", """
"""),
("""
""", """
"""),
# comments
("""< 1>""", """<1>"""),
("""< 2>""", """<2>"""),
("""< 3>""", """<3>"""),
("""< 4>""", """<4>"""),
("""< 5>""", """<5>"""),
("""<6>"""),
("""< 7>""", """<7>"""),
("""<8>"""),
("""""", """"""),
("""< 9>""", """<9>"""),
("""< 10>""", """<10>"""),
# end tags
("""""", """"""),
(""" a>""", """"""),
(""" a >""", """"""),
("""""", """"""),
("""< / a>""", """"""),
("""< /a>""", """"""),
# missing > in end tag
("""""", """
"""),
# start and end tag
("""""", """"""),
# declaration tags
("""""", """"""),
# misc
("""""", """"""),
# javascript
("""""", """"""),
("""""", """"""),
# line continuation (Dr. Fun webpage)
(" ", """ """),
# href with $
("""""", """"""),
# quoting
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""", """"""),
("""""",
""""""),
("""""",
""""""),
# entities
("""""", """"""),
# non-ascii characters
("""<Üzgür> fahr ¹²³¼½¬{""",
"""<Üzgür> fahr ¹²³¼½¬{"""),
]
flushtests = [
("<", "<"),
(", )
self.htmlparser = bk.HtmlParser.htmlsax.parser()
self.htmlparser2 = bk.HtmlParser.htmlsax.parser()
def test_parse (self):
for _in, _out in parsetests:
out = StringIO.StringIO()
self.htmlparser.handler = bk.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser.feed(_in)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_feed (self):
for _in, _out in parsetests:
out = StringIO.StringIO()
self.htmlparser.handler = bk.HtmlParser.htmllib.HtmlPrettyPrinter(out)
for c in _in:
self.htmlparser.feed(c)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_interwoven (self):
for _in, _out in parsetests:
out = StringIO.StringIO()
out2 = StringIO.StringIO()
self.htmlparser.handler = bk.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser2.handler = bk.HtmlParser.htmllib.HtmlPrettyPrinter(out2)
for c in _in:
self.htmlparser.feed(c)
self.htmlparser2.feed(c)
self.htmlparser.flush()
self.htmlparser2.flush()
res = out.getvalue()
res2 = out2.getvalue()
self.assertEqual(res, _out)
self.assertEqual(res2, _out)
self.htmlparser.reset()
def test_flush (self):
for _in, _out in flushtests:
out = StringIO.StringIO()
self.htmlparser.handler = bk.HtmlParser.htmllib.HtmlPrettyPrinter(out)
self.htmlparser.feed(_in)
self.htmlparser.flush()
res = out.getvalue()
self.assertEqual(res, _out)
self.htmlparser.reset()
def test_entities (self):
for c in "abcdefghijklmnopqrstuvwxyz":
self.assertEqual(bk.HtmlParser.resolve_entities("%d;"%ord(c)), c)
def test_suite ():
suite = unittest.TestSuite()
suite.addTest(unittest.makeSuite(TestParser))
return suite
if __name__ == '__main__':
unittest.main()
|