mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-29 10:34:42 +00:00
Support WML sites.
This commit is contained in:
parent
36b1bb01e0
commit
ecef16b2c9
8 changed files with 67 additions and 22 deletions
|
|
@ -5,6 +5,8 @@ Features:
|
|||
hostname and the expiration date are checked.
|
||||
- checking: Always compare encoded anchor names.
|
||||
Closes: SF bug #3538365
|
||||
- checking: Support WML sites.
|
||||
Closes: SF bug #3553175
|
||||
- cmdline: Added Nagios plugin script.
|
||||
|
||||
Changes:
|
||||
|
|
|
|||
|
|
@ -151,10 +151,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.add_info(_("Amazon servers block HTTP HEAD requests."))
|
||||
# check the http connection
|
||||
response = self.check_http_connection()
|
||||
if self.headers and "Server" in self.headers:
|
||||
server = self.getheader('Server')
|
||||
else:
|
||||
server = _("unknown")
|
||||
# redirections might have changed the URL
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
# check response
|
||||
|
|
@ -808,6 +804,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.parse_swf()
|
||||
elif ctype == "application/msword":
|
||||
self.parse_word()
|
||||
elif ctype == "text/vnd.wap.wml":
|
||||
self.parse_wml()
|
||||
|
||||
def get_robots_txt_url (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -99,6 +99,7 @@ class UrlBase (object):
|
|||
"text/plain+opera": "opera",
|
||||
"text/plain+chromium": "chromium",
|
||||
"application/x-plist+safari": "safari",
|
||||
"text/vnd.wap.wml": "wml",
|
||||
}
|
||||
|
||||
# Set maximum file size for downloaded files in bytes.
|
||||
|
|
@ -629,9 +630,17 @@ class UrlBase (object):
|
|||
"""Store anchors for this URL. Precondition: this URL is
|
||||
an HTML resource."""
|
||||
log.debug(LOG_CHECK, "Getting HTML anchors %s", self)
|
||||
handler = linkparse.LinkFinder(self.add_anchor,
|
||||
tags={'a': [u'name'], None: [u'id']})
|
||||
self.find_links(self.add_anchor, tags=linkparse.AnchorTags)
|
||||
|
||||
def find_links (self, callback, tags=None):
|
||||
"""Parse into content and search for URLs to check.
|
||||
Found URLs are added to the URL queue.
|
||||
"""
|
||||
# construct parser object
|
||||
handler = linkparse.LinkFinder(callback, tags=tags)
|
||||
parser = htmlsax.parser(handler)
|
||||
if self.charset:
|
||||
parser.encoding = self.charset
|
||||
handler.parser = parser
|
||||
# parse
|
||||
try:
|
||||
|
|
@ -947,21 +956,7 @@ class UrlBase (object):
|
|||
Found URLs are added to the URL queue.
|
||||
"""
|
||||
log.debug(LOG_CHECK, "Parsing HTML %s", self)
|
||||
# construct parser object
|
||||
handler = linkparse.LinkFinder(self.add_url)
|
||||
parser = htmlsax.parser(handler)
|
||||
if self.charset:
|
||||
parser.encoding = self.charset
|
||||
handler.parser = parser
|
||||
# parse
|
||||
try:
|
||||
parser.feed(self.get_content())
|
||||
parser.flush()
|
||||
except linkparse.StopParse, msg:
|
||||
log.debug(LOG_CHECK, "Stopped parsing: %s", msg)
|
||||
# break cyclic dependencies
|
||||
handler.parser = None
|
||||
parser.handler = None
|
||||
self.find_links(self.add_url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, name=u"", base=None):
|
||||
"""Queue URL data for checking."""
|
||||
|
|
@ -1051,6 +1046,13 @@ class UrlBase (object):
|
|||
except winutil.Error, msg:
|
||||
log.warn(LOG_CHECK, "Error parsing word file: %s", msg)
|
||||
|
||||
def parse_wml (self):
|
||||
"""Parse into WML content and search for URLs to check.
|
||||
Found URLs are added to the URL queue.
|
||||
"""
|
||||
log.debug(LOG_CHECK, "Parsing WML %s", self)
|
||||
self.find_links(self.add_url, tags=linkparse.WmlTags)
|
||||
|
||||
def get_temp_filename (self):
|
||||
"""Get temporary filename for content to parse."""
|
||||
# store content in temporary file
|
||||
|
|
|
|||
|
|
@ -67,6 +67,20 @@ LinkTags = {
|
|||
None: [u'style'],
|
||||
}
|
||||
|
||||
# HTML anchor tags
|
||||
AnchorTags = {
|
||||
'a': [u'name'],
|
||||
None: [u'id'],
|
||||
}
|
||||
|
||||
# WML tags
|
||||
WmlTags = {
|
||||
'a': [u'href'],
|
||||
'go': [u'href'],
|
||||
'img': [u'src'],
|
||||
}
|
||||
|
||||
|
||||
# matcher for <meta http-equiv=refresh> tags
|
||||
refresh_re = re.compile(ur"(?i)^\d+;\s*url=(?P<url>.+)$")
|
||||
_quoted_pat = ur"('[^']+'|\"[^\"]+\"|[^\)\s]+)"
|
||||
|
|
|
|||
9
tests/checker/data/file.wml
Normal file
9
tests/checker/data/file.wml
Normal file
|
|
@ -0,0 +1,9 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE wml PUBLIC "-//WAPFORUM//DTD WML 1.1//EN" "http://www.wapforum.org/DTD/wml_1.1.xml"><wml>
|
||||
<card id="main" title="Imadoofus">
|
||||
<p>
|
||||
<a href="file.html">Test1</a>
|
||||
<img src="error.gif"/>
|
||||
</p>
|
||||
</card>
|
||||
</wml>
|
||||
16
tests/checker/data/file.wml.result
Normal file
16
tests/checker/data/file.wml.result
Normal file
|
|
@ -0,0 +1,16 @@
|
|||
url file://%(curdir)s/%(datadir)s/file.wml
|
||||
cache key file://%(curdir)s/%(datadir)s/file.wml
|
||||
real url file://%(curdir)s/%(datadir)s/file.wml
|
||||
name %(datadir)s/file.wml
|
||||
valid
|
||||
|
||||
url file.html
|
||||
cache key file://%(curdir)s/%(datadir)s/file.html
|
||||
real url file://%(curdir)s/%(datadir)s/file.html
|
||||
name Test1
|
||||
valid
|
||||
|
||||
url error.gif
|
||||
cache key file://%(curdir)s/%(datadir)s/error.gif
|
||||
real url file://%(curdir)s/%(datadir)s/error.gif
|
||||
error
|
||||
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: utf-8 -*-
|
||||
# Copyright (C) 2004-2011 Bastian Kleineidam
|
||||
# Copyright (C) 2004-2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
@ -47,6 +47,9 @@ class TestFile (LinkCheckTest):
|
|||
def test_html (self):
|
||||
self.file_test("file.html")
|
||||
|
||||
def test_wml (self):
|
||||
self.file_test("file.wml")
|
||||
|
||||
def test_text (self):
|
||||
self.file_test("file.txt")
|
||||
|
||||
|
|
|
|||
|
|
@ -47,3 +47,4 @@ class TestFileutil (unittest.TestCase):
|
|||
self.mime_test(filename, "application/x-plist+safari")
|
||||
filename = os.path.join("plist_xml", "Bookmarks.plist")
|
||||
self.mime_test(filename, "application/x-plist+safari")
|
||||
self.mime_test("test.wml", "text/vnd.wap.wml")
|
||||
|
|
|
|||
Loading…
Reference in a new issue