mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-10 23:53:11 +00:00
Merge Mark-Hetherington-xml-parse-warn with slight modifications.
This commit is contained in:
parent
7d9f0b477d
commit
ad8eb424f3
3 changed files with 9 additions and 3 deletions
|
|
@ -100,6 +100,7 @@ WARN_IGNORE_URL = "ignore-url"
|
|||
WARN_MAIL_NO_MX_HOST = "mail-no-mx-host"
|
||||
WARN_NNTP_NO_SERVER = "nntp-no-server"
|
||||
WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup"
|
||||
WARN_XML_PARSE_ERROR = "xml-parse-error"
|
||||
|
||||
# registered warnings
|
||||
Warnings = {
|
||||
|
|
@ -123,4 +124,5 @@ Warnings = {
|
|||
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
|
||||
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),
|
||||
WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."),
|
||||
WARN_XML_PARSE_ERROR: _("XML could not be parsed."),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -59,6 +59,7 @@ def check_url(url_data, logger):
|
|||
# redirect aliases
|
||||
cache.add_result(alias, result)
|
||||
# parse content recursively
|
||||
# XXX this could add new warnings which should be cached.
|
||||
if do_parse:
|
||||
parser.parse_url(url_data)
|
||||
finally:
|
||||
|
|
|
|||
|
|
@ -18,7 +18,8 @@
|
|||
Main functions for link parsing
|
||||
"""
|
||||
from xml.parsers.expat import ParserCreate
|
||||
|
||||
from xml.parsers.expat import ExpatError
|
||||
from ..checker.const import (WARN_XML_PARSE_ERROR)
|
||||
|
||||
class XmlTagUrlParser(object):
|
||||
"""Parse XML files and find URLs in text content of a tag name."""
|
||||
|
|
@ -40,8 +41,10 @@ class XmlTagUrlParser(object):
|
|||
self.url = u""
|
||||
data = url_data.get_content()
|
||||
isfinal = True
|
||||
self.parser.Parse(data, isfinal)
|
||||
|
||||
try:
|
||||
self.parser.Parse(data, isfinal)
|
||||
except ExpatError as expaterr:
|
||||
self.url_data.add_warning(expaterr.message,tag=WARN_XML_PARSE_ERROR)
|
||||
def start_element(self, name, attrs):
|
||||
"""Set tag status for start element."""
|
||||
self.in_tag = (name == self.tag)
|
||||
|
|
|
|||
Loading…
Reference in a new issue