Support WML sites.

This commit is contained in:
Bastian Kleineidam 2012-08-22 22:43:14 +02:00
parent 36b1bb01e0
commit ecef16b2c9
8 changed files with 67 additions and 22 deletions

View file

@ -5,6 +5,8 @@ Features:
hostname and the expiration date are checked.
- checking: Always compare encoded anchor names.
Closes: SF bug #3538365
- checking: Support WML sites.
Closes: SF bug #3553175
- cmdline: Added Nagios plugin script.
Changes:

View file

@ -151,10 +151,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.add_info(_("Amazon servers block HTTP HEAD requests."))
# check the http connection
response = self.check_http_connection()
if self.headers and "Server" in self.headers:
server = self.getheader('Server')
else:
server = _("unknown")
# redirections might have changed the URL
self.url = urlutil.urlunsplit(self.urlparts)
# check response
@ -808,6 +804,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
self.parse_swf()
elif ctype == "application/msword":
self.parse_word()
elif ctype == "text/vnd.wap.wml":
self.parse_wml()
def get_robots_txt_url (self):
"""

View file

@ -99,6 +99,7 @@ class UrlBase (object):
"text/plain+opera": "opera",
"text/plain+chromium": "chromium",
"application/x-plist+safari": "safari",
"text/vnd.wap.wml": "wml",
}
# Set maximum file size for downloaded files in bytes.
@ -629,9 +630,17 @@ class UrlBase (object):
"""Store anchors for this URL. Precondition: this URL is
an HTML resource."""
log.debug(LOG_CHECK, "Getting HTML anchors %s", self)
handler = linkparse.LinkFinder(self.add_anchor,
tags={'a': [u'name'], None: [u'id']})
self.find_links(self.add_anchor, tags=linkparse.AnchorTags)
def find_links (self, callback, tags=None):
"""Parse into content and search for URLs to check.
Found URLs are added to the URL queue.
"""
# construct parser object
handler = linkparse.LinkFinder(callback, tags=tags)
parser = htmlsax.parser(handler)
if self.charset:
parser.encoding = self.charset
handler.parser = parser
# parse
try:
@ -947,21 +956,7 @@ class UrlBase (object):
Found URLs are added to the URL queue.
"""
log.debug(LOG_CHECK, "Parsing HTML %s", self)
# construct parser object
handler = linkparse.LinkFinder(self.add_url)
parser = htmlsax.parser(handler)
if self.charset:
parser.encoding = self.charset
handler.parser = parser
# parse
try:
parser.feed(self.get_content())
parser.flush()
except linkparse.StopParse, msg:
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)
# break cyclic dependencies
handler.parser = None
parser.handler = None
self.find_links(self.add_url)
def add_url (self, url, line=0, column=0, name=u"", base=None):
"""Queue URL data for checking."""
@ -1051,6 +1046,13 @@ class UrlBase (object):
except winutil.Error, msg:
log.warn(LOG_CHECK, "Error parsing word file: %s", msg)
def parse_wml (self):
"""Parse into WML content and search for URLs to check.
Found URLs are added to the URL queue.
"""
log.debug(LOG_CHECK, "Parsing WML %s", self)
self.find_links(self.add_url, tags=linkparse.WmlTags)
def get_temp_filename (self):
"""Get temporary filename for content to parse."""
# store content in temporary file

View file

@ -67,6 +67,20 @@ LinkTags = {
None: [u'style'],
}
# HTML anchor tags
AnchorTags = {
'a': [u'name'],
None: [u'id'],
}
# WML tags
WmlTags = {
'a': [u'href'],
'go': [u'href'],
'img': [u'src'],
}
# matcher for <meta http-equiv=refresh> tags
refresh_re = re.compile(ur"(?i)^\d+;\s*url=(?P<url>.+)$")
_quoted_pat = ur"('[^']+'|\"[^\"]+\"|[^\)\s]+)"

View file

@ -0,0 +1,9 @@
<?xml version="1.0" encoding="UTF-8"?>
<!DOCTYPE wml PUBLIC "-//WAPFORUM//DTD WML 1.1//EN" "http://www.wapforum.org/DTD/wml_1.1.xml"><wml>
<card id="main" title="Imadoofus">
<p>
<a href="file.html">Test1</a>
<img src="error.gif"/>
</p>
</card>
</wml>

View file

@ -0,0 +1,16 @@
url file://%(curdir)s/%(datadir)s/file.wml
cache key file://%(curdir)s/%(datadir)s/file.wml
real url file://%(curdir)s/%(datadir)s/file.wml
name %(datadir)s/file.wml
valid
url file.html
cache key file://%(curdir)s/%(datadir)s/file.html
real url file://%(curdir)s/%(datadir)s/file.html
name Test1
valid
url error.gif
cache key file://%(curdir)s/%(datadir)s/error.gif
real url file://%(curdir)s/%(datadir)s/error.gif
error

View file

@ -1,5 +1,5 @@
# -*- coding: utf-8 -*-
# Copyright (C) 2004-2011 Bastian Kleineidam
# Copyright (C) 2004-2012 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
@ -47,6 +47,9 @@ class TestFile (LinkCheckTest):
def test_html (self):
self.file_test("file.html")
def test_wml (self):
self.file_test("file.wml")
def test_text (self):
self.file_test("file.txt")

View file

@ -47,3 +47,4 @@ class TestFileutil (unittest.TestCase):
self.mime_test(filename, "application/x-plist+safari")
filename = os.path.join("plist_xml", "Bookmarks.plist")
self.mime_test(filename, "application/x-plist+safari")
self.mime_test("test.wml", "text/vnd.wap.wml")