# -*- coding: iso-8859-1 -*-
"""A parser for HTML"""
# Copyright (C) 2000-2003 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
import sys
try:
import htmlsax
except ImportError:
exctype, value = sys.exc_info()[:2]
print >>sys.stderr, "Could not import the parser module `htmlsax':", value
print >>sys.stderr, "Please check your installation of LinkChecker."
sys.exit(1)
class HtmlParser:
"""Use an internal C SAX parser. We do not define any callbacks
here for compatibility. Currently recognized callbacks are:
comment(data):
startElement(tag, attrs):
endElement(tag):
doctype(data):
pi(name, data=None):
cdata(data):
characters(data): data
additionally, there are error and warning callbacks:
error(msg)
warning(msg)
fatalError(msg)
"""
def __init__ (self):
"""initialize the internal parser"""
self.parser = htmlsax.parser(self)
def __getattr__ (self, name):
"""delegate unknown attrs to self.parser"""
return getattr(self.parser, name)
class HtmlPrinter (HtmlParser):
"""handles all functions by printing the function name and
attributes"""
def _print (self, *attrs):
print self.mem, attrs, self.last_lineno(), self.last_column()
def _errorfun (self, msg, name):
"""print msg to stderr with name prefix"""
pos = "%d:%d:" % (self.lineno(), self.column())
print >> sys.stderr, name, pos, msg
def error (self, msg):
"""signal a filter/parser error"""
self._errorfun(msg, "error:")
def warning (self, msg):
"""signal a filter/parser warning"""
self._errorfun(msg, "warning:")
def fatalError (self, msg):
"""signal a fatal filter/parser error"""
self._errorfun(msg, "fatal error:")
def __getattr__ (self, name):
if hasattr(self.parser, name):
return getattr(self.parser, name)
self.mem = name
return self._print
def _test():
p = HtmlPrinter()
p.feed("")
p.feed("")
p.feed("")
p.feed('')
p.feed("")
p.feed('')
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.feed("")
p.flush()
def _broken ():
p = HtmlPrinter()
p.feed("
")
p.flush()
if __name__ == '__main__':
#_test()
_broken()