mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-17 02:51:07 +00:00
Ignore PHP processing instructions in local files.
This commit is contained in:
parent
f27f14dabe
commit
db95fce77e
7 changed files with 61 additions and 5 deletions
|
|
@ -5,6 +5,12 @@ Fixes:
|
|||
Closes: SF bug #3528450
|
||||
- checking: Fix writing temporary files.
|
||||
|
||||
Changes:
|
||||
- checking: Ignore URLs from local PHP files with execution
|
||||
directives of the form "<? ?>".
|
||||
Prevents false errors when checking local PHP files.
|
||||
Closes: SF bug #3532763
|
||||
|
||||
Features:
|
||||
- installation: Support RPM building with cx_Freeze.
|
||||
- installation: Added .desktop files for POSIX systems.
|
||||
|
|
|
|||
|
|
@ -50,7 +50,7 @@ def absolute_url (base_url, base_ref, parent_url):
|
|||
|
||||
def get_url_from (base_url, recursion_level, aggregate,
|
||||
parent_url=None, base_ref=None, line=0, column=0,
|
||||
name=u""):
|
||||
name=u"", parent_content_type=None):
|
||||
"""
|
||||
Get url data from given base data.
|
||||
|
||||
|
|
@ -86,7 +86,12 @@ def get_url_from (base_url, recursion_level, aggregate,
|
|||
if not (url or name):
|
||||
# use filename as base url, with slash as path seperator
|
||||
name = base_url.replace("\\", "/")
|
||||
klass = get_urlclass_from(url)
|
||||
if parent_content_type == 'application/x-httpd-php' and \
|
||||
'<?' in base_url and url.startswith('file:'):
|
||||
# ignore URLs from local PHP files with execution directives
|
||||
klass = ignoreurl.IgnoreUrl
|
||||
else:
|
||||
klass = get_urlclass_from(url)
|
||||
return klass(base_url, recursion_level, aggregate,
|
||||
parent_url=parent_url, base_ref=base_ref,
|
||||
line=line, column=column, name=name)
|
||||
|
|
@ -157,4 +162,4 @@ class StoringHandler (logging.Handler):
|
|||
|
||||
# all the URL classes
|
||||
from . import (fileurl, unknownurl, ftpurl, httpurl,
|
||||
httpsurl, mailtourl, telneturl, nntpurl)
|
||||
httpsurl, mailtourl, telneturl, nntpurl, ignoreurl)
|
||||
|
|
|
|||
28
linkcheck/checker/ignoreurl.py
Normal file
28
linkcheck/checker/ignoreurl.py
Normal file
|
|
@ -0,0 +1,28 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Handle ignored URLs.
|
||||
"""
|
||||
|
||||
from . import unknownurl
|
||||
|
||||
class IgnoreUrl (unknownurl.UnknownUrl):
|
||||
"""Always ignored URL."""
|
||||
|
||||
def ignored (self):
|
||||
"""Return True if this URL scheme is ignored."""
|
||||
return True
|
||||
|
|
@ -957,7 +957,7 @@ class UrlBase (object):
|
|||
base_ref = urlutil.url_norm(base)[0]
|
||||
url_data = get_url_from(url, self.recursion_level+1, self.aggregate,
|
||||
parent_url=self.url, base_ref=base_ref, line=line, column=column,
|
||||
name=name)
|
||||
name=name, parent_content_type=self.content_type)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def parse_opera (self):
|
||||
|
|
|
|||
|
|
@ -195,7 +195,7 @@ def init_mimedb():
|
|||
# For Opera bookmark files (opera6.adr)
|
||||
add_mimetype(mimedb, 'text/plain', '.adr')
|
||||
# To recognize PHP files as HTML with content check.
|
||||
add_mimetype(mimedb, 'text/plain', '.php')
|
||||
add_mimetype(mimedb, 'application/x-httpd-php', '.php')
|
||||
|
||||
|
||||
def add_mimetype(mimedb, mimetype, extension):
|
||||
|
|
|
|||
|
|
@ -1 +1,4 @@
|
|||
<a href="anchor.html">Bla</a>
|
||||
<!-- URLs with processing instructions are ignored -->
|
||||
<a href="test_<? echo $module ?>">PHP 1</a>
|
||||
<a href="test_<?php echo $module ?>">PHP 2</a>
|
||||
|
|
|
|||
|
|
@ -9,3 +9,17 @@ cache key file://%(curdir)s/%(datadir)s/anchor.html
|
|||
real url file://%(curdir)s/%(datadir)s/anchor.html
|
||||
name Bla
|
||||
valid
|
||||
|
||||
url test_<? echo $module ?>
|
||||
cache key file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E
|
||||
real url file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E
|
||||
name PHP 1
|
||||
warning File URL ignored.
|
||||
valid
|
||||
|
||||
url test_<?php echo $module ?>
|
||||
cache key file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E
|
||||
real url file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E
|
||||
name PHP 2
|
||||
warning File URL ignored.
|
||||
valid
|
||||
|
|
|
|||
Loading…
Reference in a new issue