Added XML filter example.

This commit is contained in:
Bastian Kleineidam 2011-02-09 18:07:40 +01:00
parent a24ced8b9a
commit 2f1d06e4d9
3 changed files with 51 additions and 1 deletions

View file

@ -50,7 +50,7 @@ include doc/po4a.conf doc/*.po doc/*.pot
include doc/en/*.1 doc/en/*.5
include doc/de/*.1 doc/de/*.5
include doc/Makefile
include doc/examples/*.sh doc/examples/*.bat
include doc/examples/*.sh doc/examples/*.bat doc/examples/*.py
include doc/examples/linkcheckerrc_loginurl
include linkcheck/dns/changelog.txt
recursive-include tests *.py *.result *.html *.ico *.txt *.zip *.asc *.css *.xhtml *.sqlite *.adr *.swf

View file

@ -2,6 +2,7 @@ doc/examples/check_blacklist.sh
doc/examples/check_for_x_errors.sh
doc/examples/check_urls.sh
doc/examples/linkcheckerrc_loginurl
doc/examples/filter_xml_output.py
doc/robots.txt.example
cgi-bin/lc.fcgi
config/create.sql

View file

@ -0,0 +1,49 @@
#!/usr/bin/python
# -*- coding: iso-8859-1 -*-
# Copyright (C) 2011 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Example to filter XML output.
Call with XML output filename as first argument.
Prints filtered result on standard output.
"""
import sys
from xml.etree.ElementTree import parse
def main (args):
filename = args[0]
with open(filename) as fd:
tree = parse(fd)
filter_tree(tree)
tree.write(sys.stdout, encoding='utf-8')
def filter_tree(tree):
"""Filter all 401 errors."""
to_remove = []
for elem in tree.findall('urldata'):
valid = elem.find('valid')
if valid is not None and valid.text == '0' and \
valid.attrib.get('result', '').startswith('401'):
to_remove.append(elem)
root = tree.getroot()
for elem in to_remove:
root.remove(elem)
if __name__ == '__main__':
main(sys.argv[1:])