Merge Mark-Hetherington-xml-parse-warn with slight modifications.

This commit is contained in:
Bastian Kleineidam 2014-06-13 20:50:37 +02:00
parent 7d9f0b477d
commit ad8eb424f3
3 changed files with 9 additions and 3 deletions

View file

@ -100,6 +100,7 @@ WARN_IGNORE_URL = "ignore-url"
WARN_MAIL_NO_MX_HOST = "mail-no-mx-host"
WARN_NNTP_NO_SERVER = "nntp-no-server"
WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup"
WARN_XML_PARSE_ERROR = "xml-parse-error"
# registered warnings
Warnings = {
@ -123,4 +124,5 @@ Warnings = {
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),
WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."),
WARN_XML_PARSE_ERROR: _("XML could not be parsed."),
}

View file

@ -59,6 +59,7 @@ def check_url(url_data, logger):
# redirect aliases
cache.add_result(alias, result)
# parse content recursively
# XXX this could add new warnings which should be cached.
if do_parse:
parser.parse_url(url_data)
finally:

View file

@ -18,7 +18,8 @@
Main functions for link parsing
"""
from xml.parsers.expat import ParserCreate
from xml.parsers.expat import ExpatError
from ..checker.const import (WARN_XML_PARSE_ERROR)
class XmlTagUrlParser(object):
"""Parse XML files and find URLs in text content of a tag name."""
@ -40,8 +41,10 @@ class XmlTagUrlParser(object):
self.url = u""
data = url_data.get_content()
isfinal = True
self.parser.Parse(data, isfinal)
try:
self.parser.Parse(data, isfinal)
except ExpatError as expaterr:
self.url_data.add_warning(expaterr.message,tag=WARN_XML_PARSE_ERROR)
def start_element(self, name, attrs):
"""Set tag status for start element."""
self.in_tag = (name == self.tag)