diff --git a/doc/changelog.txt b/doc/changelog.txt
index 05574ca9..214e00ee 100644
--- a/doc/changelog.txt
+++ b/doc/changelog.txt
@@ -5,6 +5,12 @@ Fixes:
Closes: SF bug #3528450
- checking: Fix writing temporary files.
+Changes:
+- checking: Ignore URLs from local PHP files with execution
+ directives of the form " ?>".
+ Prevents false errors when checking local PHP files.
+ Closes: SF bug #3532763
+
Features:
- installation: Support RPM building with cx_Freeze.
- installation: Added .desktop files for POSIX systems.
diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py
index e56bdd74..8352b867 100644
--- a/linkcheck/checker/__init__.py
+++ b/linkcheck/checker/__init__.py
@@ -50,7 +50,7 @@ def absolute_url (base_url, base_ref, parent_url):
def get_url_from (base_url, recursion_level, aggregate,
parent_url=None, base_ref=None, line=0, column=0,
- name=u""):
+ name=u"", parent_content_type=None):
"""
Get url data from given base data.
@@ -86,7 +86,12 @@ def get_url_from (base_url, recursion_level, aggregate,
if not (url or name):
# use filename as base url, with slash as path seperator
name = base_url.replace("\\", "/")
- klass = get_urlclass_from(url)
+ if parent_content_type == 'application/x-httpd-php' and \
+ '' in base_url and url.startswith('file:'):
+ # ignore URLs from local PHP files with execution directives
+ klass = ignoreurl.IgnoreUrl
+ else:
+ klass = get_urlclass_from(url)
return klass(base_url, recursion_level, aggregate,
parent_url=parent_url, base_ref=base_ref,
line=line, column=column, name=name)
@@ -157,4 +162,4 @@ class StoringHandler (logging.Handler):
# all the URL classes
from . import (fileurl, unknownurl, ftpurl, httpurl,
- httpsurl, mailtourl, telneturl, nntpurl)
+ httpsurl, mailtourl, telneturl, nntpurl, ignoreurl)
diff --git a/linkcheck/checker/ignoreurl.py b/linkcheck/checker/ignoreurl.py
new file mode 100644
index 00000000..4a345dd2
--- /dev/null
+++ b/linkcheck/checker/ignoreurl.py
@@ -0,0 +1,28 @@
+# -*- coding: iso-8859-1 -*-
+# Copyright (C) 2012 Bastian Kleineidam
+#
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2 of the License, or
+# (at your option) any later version.
+#
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+"""
+Handle ignored URLs.
+"""
+
+from . import unknownurl
+
+class IgnoreUrl (unknownurl.UnknownUrl):
+ """Always ignored URL."""
+
+ def ignored (self):
+ """Return True if this URL scheme is ignored."""
+ return True
diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py
index a02b2b32..0d78788d 100644
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@@ -957,7 +957,7 @@ class UrlBase (object):
base_ref = urlutil.url_norm(base)[0]
url_data = get_url_from(url, self.recursion_level+1, self.aggregate,
parent_url=self.url, base_ref=base_ref, line=line, column=column,
- name=name)
+ name=name, parent_content_type=self.content_type)
self.aggregate.urlqueue.put(url_data)
def parse_opera (self):
diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py
index e94dfdd5..7233fad2 100644
--- a/linkcheck/fileutil.py
+++ b/linkcheck/fileutil.py
@@ -195,7 +195,7 @@ def init_mimedb():
# For Opera bookmark files (opera6.adr)
add_mimetype(mimedb, 'text/plain', '.adr')
# To recognize PHP files as HTML with content check.
- add_mimetype(mimedb, 'text/plain', '.php')
+ add_mimetype(mimedb, 'application/x-httpd-php', '.php')
def add_mimetype(mimedb, mimetype, extension):
diff --git a/tests/checker/data/file.php b/tests/checker/data/file.php
index 2e339c9e..995d47d2 100644
--- a/tests/checker/data/file.php
+++ b/tests/checker/data/file.php
@@ -1 +1,4 @@
Bla
+
+PHP 1
+PHP 2
diff --git a/tests/checker/data/file.php.result b/tests/checker/data/file.php.result
index 9f28238e..f033f56e 100644
--- a/tests/checker/data/file.php.result
+++ b/tests/checker/data/file.php.result
@@ -9,3 +9,17 @@ cache key file://%(curdir)s/%(datadir)s/anchor.html
real url file://%(curdir)s/%(datadir)s/anchor.html
name Bla
valid
+
+url test_ echo $module ?>
+cache key file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E
+real url file://%(curdir)s/%(datadir)s/test_%%3C?%%20echo%%20%%24module%%20?%%3E
+name PHP 1
+warning File URL ignored.
+valid
+
+url test_
+cache key file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E
+real url file://%(curdir)s/%(datadir)s/test_%%3C?php%%20echo%%20%%24module%%20?%%3E
+name PHP 2
+warning File URL ignored.
+valid