mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-21 06:41:00 +00:00
Support itms-services: URLs. #532
This commit is contained in:
parent
37d4ed6f83
commit
ee4545399d
4 changed files with 64 additions and 2 deletions
|
|
@ -1,5 +1,9 @@
|
|||
9.4 "" (released xx.xx.xxxx)
|
||||
|
||||
Features:
|
||||
- checking: Support itms-services: URLs.
|
||||
Closes: GH bug #532
|
||||
|
||||
|
||||
9.3 "Better Living Through Chemistry" (released 16.7.2014)
|
||||
|
||||
|
|
|
|||
|
|
@ -143,6 +143,8 @@ def get_urlclass_from (scheme, assume_local_file=False):
|
|||
klass = nntpurl.NntpUrl
|
||||
elif scheme == "dns":
|
||||
klass = dnsurl.DnsUrl
|
||||
elif scheme == "itms-services":
|
||||
klass = itmsservicesurl.ItmsServicesUrl
|
||||
elif scheme and unknownurl.is_unknown_scheme(scheme):
|
||||
klass = unknownurl.UnknownUrl
|
||||
elif assume_local_file:
|
||||
|
|
@ -174,4 +176,4 @@ def get_index_html (urls):
|
|||
|
||||
# all the URL classes
|
||||
from . import (fileurl, unknownurl, ftpurl, httpurl, dnsurl,
|
||||
mailtourl, telneturl, nntpurl, ignoreurl)
|
||||
mailtourl, telneturl, nntpurl, ignoreurl, itmsservicesurl)
|
||||
|
|
|
|||
45
linkcheck/checker/itmsservicesurl.py
Normal file
45
linkcheck/checker/itmsservicesurl.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Handle itms-services URLs.
|
||||
"""
|
||||
|
||||
from . import urlbase
|
||||
from .. import log, LOG_CHECK
|
||||
|
||||
|
||||
class ItmsServicesUrl(urlbase.UrlBase):
|
||||
"""Apple iOS application download URLs."""
|
||||
|
||||
def check_syntax(self):
|
||||
"""Only logs that this URL is unknown."""
|
||||
super(ItmsServicesUrl, self).check_syntax()
|
||||
if u"url=" not in self.urlparts[3]:
|
||||
self.set_result(_("Missing required url parameter"), valid=False)
|
||||
|
||||
def local_check(self):
|
||||
"""Disable content checks."""
|
||||
log.debug(LOG_CHECK, "Checking %s", unicode(self))
|
||||
pass
|
||||
|
||||
def check_content(self):
|
||||
"""Allow recursion to check the url CGI param."""
|
||||
return True
|
||||
|
||||
def is_parseable(self):
|
||||
"""This URL is parseable."""
|
||||
return True
|
||||
|
|
@ -17,7 +17,7 @@
|
|||
"""
|
||||
Main functions for link parsing
|
||||
"""
|
||||
from .. import log, LOG_CHECK, strformat
|
||||
from .. import log, LOG_CHECK, strformat, url as urlutil
|
||||
from ..htmlutil import linkparse
|
||||
from ..HtmlParser import htmlsax
|
||||
from ..bookmarks import firefox
|
||||
|
|
@ -30,6 +30,8 @@ def parse_url(url_data):
|
|||
key = "html"
|
||||
elif url_data.is_file() and firefox.has_sqlite and firefox.extension.search(url_data.url):
|
||||
key = "firefox"
|
||||
elif url_data.scheme == "itms-services":
|
||||
key = "itms_services"
|
||||
else:
|
||||
# determine parse routine according to content types
|
||||
mime = url_data.content_type
|
||||
|
|
@ -140,4 +142,13 @@ def parse_firefox (url_data):
|
|||
url_data.add_url(url, name=name)
|
||||
|
||||
|
||||
def parse_itms_services(url_data):
|
||||
"""Get "url" CGI parameter value as child URL."""
|
||||
query = url_data.urlparts[3]
|
||||
for k, v, sep in urlutil.parse_qsl(query, keep_blank_values=True):
|
||||
if k == "url":
|
||||
url_data.add_url(v)
|
||||
break
|
||||
|
||||
|
||||
from .sitemap import parse_sitemap, parse_sitemapindex
|
||||
|
|
|
|||
Loading…
Reference in a new issue