mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-03 12:24:46 +00:00
config to ignore certain warnings
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2743 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
badb88ed68
commit
f80480469f
22 changed files with 224 additions and 124 deletions
|
|
@ -9,6 +9,12 @@
|
|||
Changed: linkcheck/__init__.py, doc/en/linkchecker.1,
|
||||
linkcheck/dns/{ifconfig,resolver}.py
|
||||
|
||||
* Use tags for different LinkChecker warnings and allow them to
|
||||
be filtered with a configuration file entry.
|
||||
Type: feature
|
||||
Changed: linkchecker, linkcheck/checker/*.py,
|
||||
linkcheck/configuration.py
|
||||
|
||||
3.0 "The Jacket" (released 8.7.2005)
|
||||
|
||||
* Catch all check errors, not just the ones inside of URL checking.
|
||||
|
|
|
|||
3
TODO
3
TODO
|
|
@ -1,8 +1,5 @@
|
|||
Possible improvements people could work on:
|
||||
|
||||
- [FEATURE] Add tests cases specified in
|
||||
http://intertwingly.net/stories/2004/08/04/urlnorm.py
|
||||
|
||||
- [FEATURE] Assign numbers to warnings and let the user configure which to
|
||||
ignore.
|
||||
|
||||
|
|
|
|||
|
|
@ -144,6 +144,10 @@
|
|||
# value is a regular expression
|
||||
#noproxy1=*\.intra
|
||||
#noproxy2=.*myinternurl.*
|
||||
# Ignore specified warnings (see linkchecker -h for the list of
|
||||
# recognized warnings). Add a comma-separated list of warnings here
|
||||
# you want to be ignored by the output loggers.
|
||||
#ignorewarnings=url-not-normed,url-unicode-domain,anchor-not-found
|
||||
|
||||
# You can provide different user/password pairs for different link types.
|
||||
# Entries are a triple (link regular expression, username, password),
|
||||
|
|
|
|||
|
|
@ -20,7 +20,16 @@ window.onload = function() {
|
|||
</head>
|
||||
<body>
|
||||
<!-- bfknav -->
|
||||
Imagine a navigation
|
||||
<div class="navigation">
|
||||
<div class="navrow" style="padding: 0em 0em 0em 1em;">
|
||||
<a href="index.html">LinkChecker</a>
|
||||
<span>Installation</span>
|
||||
<a href="upgrading.html">Upgrading</a>
|
||||
<a href="documentation.html">Documentation</a>
|
||||
<a href="other.html">Other</a>
|
||||
|
||||
</div>
|
||||
</div>
|
||||
<!-- /bfknav -->
|
||||
<div class="document" id="installation">
|
||||
<h1 class="title">Installation</h1>
|
||||
|
|
|
|||
|
|
@ -23,6 +23,8 @@ import sys
|
|||
import os
|
||||
import cgi
|
||||
import socket
|
||||
import codecs
|
||||
import traceback
|
||||
import select
|
||||
import re
|
||||
import urllib
|
||||
|
|
@ -60,6 +62,43 @@ ExcList = [
|
|||
ftplib.error_proto,
|
||||
]
|
||||
|
||||
# registered warnings
|
||||
Warnings = {
|
||||
"url-effective-url":
|
||||
_("The effective URL is different from the original."),
|
||||
"url-unicode-domain": _("URL uses a unicode domain."),
|
||||
"url-unnormed": _("URL is not normed."),
|
||||
"url-anchor-not-found": _("URL anchor was not found."),
|
||||
"url-warnregex-found":
|
||||
_("The warning regular expression was found in the URL contents."),
|
||||
"url-content-too-large": _("The URL content is too large."),
|
||||
"file-missing-slash": _("The file: URL is missing a trailing slash."),
|
||||
"file-system-path":
|
||||
_("The file: path is not the same as the system specific path."),
|
||||
"ftp-missing-slash": _("The ftp: URL is missing a trailing slash."),
|
||||
"http-robots-denied": _("The http: URL checking has been denied."),
|
||||
"http-no-anchor-support": _("The HTTP server had no anchor support."),
|
||||
"http-moved-permanent": _("The URL has moved permanently."),
|
||||
"http-wrong-redirect":
|
||||
_("The URL has been redirected to an URL of a different type."),
|
||||
"http-empty-content": _("The URL had no content."),
|
||||
"http-cookie-store-error": _("An error occurred while storing a cookie."),
|
||||
"http-decompress-error":
|
||||
_("An error occurred while decompressing the URL content."),
|
||||
"http-unsupported-encoding":
|
||||
_("The URL content is encoded with an unknown encoding."),
|
||||
"ignored-url": _("The URL has been ignored."),
|
||||
"mail-no-addresses": _("The mailto: URL contained no addresses."),
|
||||
"mail-no-mx-host": _("The mail MX host could not be found."),
|
||||
"mail-unverified-address":
|
||||
_("The mailto: address could not be verified."),
|
||||
"mail-no-connection":
|
||||
_("No connection to a MX host could be established."),
|
||||
"nntp-no-server": _("No NNTP server was found."),
|
||||
"nntp-no-newsgroup": _("The NNTP newsgroup could not be found."),
|
||||
"nntp-busy": _("The NNTP server was busy."),
|
||||
}
|
||||
|
||||
ignored_schemes = r"""^(
|
||||
acap # application configuration access protocol
|
||||
|afs # Andrew File System global file names
|
||||
|
|
@ -99,6 +138,48 @@ acap # application configuration access protocol
|
|||
|
||||
ignored_schemes_re = re.compile(ignored_schemes, re.VERBOSE)
|
||||
|
||||
stderr = codecs.getwriter("iso8859-1")(sys.stderr, errors="ignore")
|
||||
|
||||
def internal_error ():
|
||||
"""
|
||||
Print internal error message to stderr.
|
||||
"""
|
||||
print >> stderr, os.linesep
|
||||
print >> stderr, _("""********** Oops, I did it again. *************
|
||||
|
||||
You have found an internal error in LinkChecker. Please write a bug report
|
||||
at http://sourceforge.net/tracker/?func=add&group_id=1913&atid=101913
|
||||
or send mail to %s and include the following information:
|
||||
- the URL or file you are testing
|
||||
- your commandline arguments and/or configuration.
|
||||
- the output of a debug run with option "-Dall" of the executed command
|
||||
- the system information below.
|
||||
|
||||
Disclosing some of the information above due to privacy reasons is ok.
|
||||
I will try to help you nonetheless, but you have to give me something
|
||||
I can work with ;) .
|
||||
""") % linkcheck.configuration.Email
|
||||
etype, value = sys.exc_info()[:2]
|
||||
print >> stderr, etype, value
|
||||
traceback.print_exc()
|
||||
print_app_info()
|
||||
print >> stderr, os.linesep, \
|
||||
_("******** LinkChecker internal error, over and out ********")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def print_app_info ():
|
||||
"""
|
||||
Print system and application info to stderr.
|
||||
"""
|
||||
print >> stderr, _("System info:")
|
||||
print >> stderr, linkcheck.configuration.App
|
||||
print >> stderr, _("Python %s on %s") % (sys.version, sys.platform)
|
||||
for key in ("LC_ALL", "LC_MESSAGES", "http_proxy", "ftp_proxy"):
|
||||
value = os.getenv(key)
|
||||
if value is not None:
|
||||
print >> stderr, key, "=", repr(value)
|
||||
|
||||
|
||||
def abort (consumer):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -31,7 +31,7 @@ import linkcheck.lock
|
|||
import linkcheck.strformat
|
||||
import linkcheck.checker.geoip
|
||||
from linkcheck.decorators import synchronized
|
||||
from urlbase import stderr
|
||||
from linkcheck.checker import stderr
|
||||
|
||||
# global lock for synchronizing all the checker threads
|
||||
_lock = thread.allocate_lock()
|
||||
|
|
@ -200,8 +200,13 @@ class Consumer (object):
|
|||
"""
|
||||
Send new url to all configured loggers.
|
||||
"""
|
||||
has_warnings = False
|
||||
for tag, content in url_data.warnings:
|
||||
if tag not in self._config["ignorewarnings"]:
|
||||
has_warnings = True
|
||||
break
|
||||
do_print = self._config["verbose"] or not url_data.valid or \
|
||||
(url_data.warning and self._config["warnings"])
|
||||
(has_warnings and self._config["warnings"])
|
||||
self._config['logger'].log_filter_url(url_data, do_print)
|
||||
for log in self._config['fileoutput']:
|
||||
log.log_filter_url(url_data, do_print)
|
||||
|
|
|
|||
|
|
@ -116,7 +116,8 @@ class FileUrl (urlbase.UrlBase):
|
|||
# ignore query and fragment url parts for filesystem urls
|
||||
self.urlparts[3] = self.urlparts[4] = ''
|
||||
if self.is_directory() and not self.urlparts[2].endswith('/'):
|
||||
self.add_warning(_("Added trailing slash to directory."))
|
||||
self.add_warning(_("Added trailing slash to directory."),
|
||||
tag="file-missing-slash")
|
||||
self.urlparts[2] += '/'
|
||||
self.url = urlparse.urlunsplit(self.urlparts)
|
||||
|
||||
|
|
@ -144,7 +145,8 @@ class FileUrl (urlbase.UrlBase):
|
|||
if path != realpath:
|
||||
self.add_warning(_("The URL path %r is not the same as the " \
|
||||
"system path %r. You should always use " \
|
||||
"the system path in URLs.") % (path, realpath))
|
||||
"the system path in URLs.") % (path, realpath),
|
||||
tag="file-system-path")
|
||||
pass
|
||||
|
||||
def get_content (self):
|
||||
|
|
|
|||
|
|
@ -142,7 +142,8 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if "%s/" % self.filename in files:
|
||||
if not self.url.endswith('/'):
|
||||
self.add_warning(
|
||||
_("Missing trailing directory slash in ftp url."))
|
||||
_("Missing trailing directory slash in ftp url."),
|
||||
tag="ftp-missing-slash")
|
||||
self.url += '/'
|
||||
return
|
||||
raise ftplib.error_perm, "550 File not found"
|
||||
|
|
|
|||
|
|
@ -134,7 +134,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if not self.allows_robots(self.url):
|
||||
# remove all previously stored results
|
||||
self.add_warning(
|
||||
_("Access denied by robots.txt, checked only syntax."))
|
||||
_("Access denied by robots.txt, checked only syntax."),
|
||||
linkcheck.checker.WARN_HTTP_ROBOTS_DENIED)
|
||||
self.set_result(u"syntax OK")
|
||||
return
|
||||
# check for amazon server quirk
|
||||
|
|
@ -156,7 +157,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
"a GET request was used instead.") % server)
|
||||
if self.no_anchor:
|
||||
self.add_warning(_("Server %r had no anchor support, removed"\
|
||||
" anchor from request.") % server)
|
||||
" anchor from request.") % server,
|
||||
linkcheck.checker.WARN_HTTP_NO_ANCHOR_SUPPORT)
|
||||
# redirections might have changed the URL
|
||||
newurl = urlparse.urlunsplit(self.urlparts)
|
||||
if self.url != newurl:
|
||||
|
|
@ -292,7 +294,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# check robots.txt allowance again
|
||||
if not self.allows_robots(redirected):
|
||||
self.add_warning(
|
||||
_("Access denied by robots.txt, checked only syntax."))
|
||||
_("Access denied by robots.txt, checked only syntax."),
|
||||
tag="http-robots-denied")
|
||||
self.set_result(u"syntax OK")
|
||||
return -1, response
|
||||
# see about recursive redirect
|
||||
|
|
@ -315,7 +318,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if not self.has301status:
|
||||
self.add_warning(
|
||||
_("HTTP 301 (moved permanent) encountered: you"
|
||||
" should update this link."))
|
||||
" should update this link."),
|
||||
tag="http-moved-permanent")
|
||||
self.has301status = True
|
||||
# check cache again on the changed URL
|
||||
if self.consumer.checked_redirect(redirected, self):
|
||||
|
|
@ -325,7 +329,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if self.urlparts[0] != "http":
|
||||
self.add_warning(
|
||||
_("HTTP redirection to non-http url encountered; "
|
||||
"the original url was %r.") % self.url)
|
||||
"the original url was %r.") % self.url,
|
||||
tag="http-wrong-redirect")
|
||||
# make new Url object
|
||||
newobj = linkcheck.checker.get_url_from(
|
||||
redirected, self.recursion_level, self.consumer,
|
||||
|
|
@ -354,7 +359,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
if response.status == 204:
|
||||
# no content
|
||||
self.add_warning(
|
||||
linkcheck.strformat.unicode_safe(response.reason))
|
||||
linkcheck.strformat.unicode_safe(response.reason),
|
||||
tag="http-empty-content")
|
||||
# store cookies for valid links
|
||||
if self.consumer.config('cookies'):
|
||||
for c in self.cookies:
|
||||
|
|
@ -366,7 +372,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
self.add_info(linkcheck.strformat.unicode_safe(h))
|
||||
except Cookie.CookieError, msg:
|
||||
self.add_warning(_("Could not store cookies: %(msg)s.") %
|
||||
{'msg': str(msg)})
|
||||
{'msg': str(msg)},
|
||||
tag="http-cookie-store-error")
|
||||
if response.status >= 200:
|
||||
self.set_result(u"%r %s" % (response.status, response.reason))
|
||||
else:
|
||||
|
|
@ -485,7 +492,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
StringIO.StringIO(self.data))
|
||||
except zlib.error, msg:
|
||||
self.add_warning(_("Decompress error %(err)s") % \
|
||||
{"err": str(msg)})
|
||||
{"err": str(msg)},
|
||||
tag="http-decompress-error")
|
||||
f = StringIO.StringIO(self.data)
|
||||
self.data = f.read()
|
||||
self.downloadtime = time.time() - t
|
||||
|
|
@ -506,7 +514,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding and encoding not in _supported_encodings and \
|
||||
encoding != 'identity':
|
||||
self.add_warning(_('Unsupported content encoding %r.') % encoding)
|
||||
self.add_warning(_('Unsupported content encoding %r.') % encoding,
|
||||
tag="http-unsupported-encoding")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
|
@ -534,7 +543,8 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding and encoding not in _supported_encodings and \
|
||||
encoding != 'identity':
|
||||
self.add_warning(_('Unsupported content encoding %r.') % encoding)
|
||||
self.add_warning(_('Unsupported content encoding %r.') % encoding,
|
||||
tag="http-unsupported-encoding")
|
||||
return False
|
||||
return True
|
||||
|
||||
|
|
|
|||
|
|
@ -33,7 +33,8 @@ class IgnoredUrl (urlbase.UrlBase):
|
|||
if self.extern[0] and self.extern[1]:
|
||||
self.add_info(_("Outside of domain filter, checked only syntax."))
|
||||
else:
|
||||
self.add_warning(_("%s URL ignored.") % self.scheme.capitalize())
|
||||
self.add_warning(_("%s URL ignored.") % self.scheme.capitalize(),
|
||||
tag="ignore-url")
|
||||
|
||||
def can_get_content (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -133,7 +133,8 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
If not, print a warning.
|
||||
"""
|
||||
if not self.addresses:
|
||||
self.add_warning(_("No addresses found."))
|
||||
self.add_warning(_("No addresses found."),
|
||||
tag="mail-no-addresses")
|
||||
return
|
||||
for name, mail in self.addresses:
|
||||
self.check_smtp_domain(name, mail)
|
||||
|
|
@ -152,7 +153,8 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
answers = linkcheck.dns.resolver.query(domain, 'MX')
|
||||
if len(answers) == 0:
|
||||
self.add_warning(_("No MX mail host for %(domain)s found.") % \
|
||||
{'domain': domain})
|
||||
{'domain': domain},
|
||||
tag="mail-no-mx-host")
|
||||
answers = linkcheck.dns.resolver.query(domain, 'A')
|
||||
if len(answers) == 0:
|
||||
self.set_result(_("No host for %(domain)s found.") % \
|
||||
|
|
@ -210,11 +212,13 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
self.add_info(_("Unverified address: %(info)s." \
|
||||
" But mail will be sent anyway.") % d)
|
||||
else:
|
||||
self.add_warning(_("Unverified address: %(info)s.") % d)
|
||||
self.add_warning(_("Unverified address: %(info)s.") % d,
|
||||
tag="mail-unverified-address")
|
||||
except smtplib.SMTPException, msg:
|
||||
self.add_warning(
|
||||
_("MX mail host %(host)s did not accept connections: " \
|
||||
"%(error)s.") % {'host': host, 'error': str(msg)})
|
||||
"%(error)s.") % {'host': host, 'error': str(msg)},
|
||||
tag="mail-no-connection")
|
||||
if smtpconnect:
|
||||
break
|
||||
if not smtpconnect:
|
||||
|
|
|
|||
|
|
@ -43,7 +43,8 @@ class NntpUrl (urlbase.UrlBase):
|
|||
nntpserver = self.host or self.consumer.config("nntpserver")
|
||||
if not nntpserver:
|
||||
self.add_warning(
|
||||
_("No NNTP server was specified, skipping this URL."))
|
||||
_("No NNTP server was specified, skipping this URL."),
|
||||
tag="nntp-no-server")
|
||||
return
|
||||
nntp = self._connect_nntp(nntpserver)
|
||||
group = self.urlparts[2]
|
||||
|
|
@ -62,7 +63,8 @@ class NntpUrl (urlbase.UrlBase):
|
|||
self.add_info(_("News group %s found.") % name)
|
||||
else:
|
||||
# group name is the empty string
|
||||
self.add_warning(_("No newsgroup specified in NNTP URL."))
|
||||
self.add_warning(_("No newsgroup specified in NNTP URL."),
|
||||
tag="nttp-no-newsgroup")
|
||||
|
||||
def _connect_nntp (self, nntpserver):
|
||||
"""
|
||||
|
|
@ -86,7 +88,8 @@ class NntpUrl (urlbase.UrlBase):
|
|||
raise linkcheck.LinkCheckerError, \
|
||||
_("NTTP server too busy; tried more than %d times.") % tries
|
||||
if value is not None:
|
||||
self.add_warning(_("NNTP busy: %s.") % str(value))
|
||||
self.add_warning(_("NNTP busy: %s.") % str(value),
|
||||
tag="nttp-busy")
|
||||
return nntp
|
||||
|
||||
def can_get_content (self):
|
||||
|
|
|
|||
|
|
@ -24,13 +24,13 @@ import urlparse
|
|||
import urllib2
|
||||
import urllib
|
||||
import time
|
||||
import traceback
|
||||
import socket
|
||||
import select
|
||||
import codecs
|
||||
import traceback
|
||||
|
||||
import linkcheck
|
||||
import linkcheck.linkparse
|
||||
import linkcheck.checker
|
||||
import linkcheck.strformat
|
||||
import linkcheck.containers
|
||||
import linkcheck.log
|
||||
|
|
@ -38,49 +38,6 @@ import linkcheck.httplib2
|
|||
import linkcheck.HtmlParser.htmlsax
|
||||
|
||||
|
||||
stderr = codecs.getwriter("iso8859-1")(sys.stderr, errors="ignore")
|
||||
|
||||
def internal_error ():
|
||||
"""
|
||||
Print internal error message to stderr.
|
||||
"""
|
||||
print >> stderr, os.linesep
|
||||
print >> stderr, _("""********** Oops, I did it again. *************
|
||||
|
||||
You have found an internal error in LinkChecker. Please write a bug report
|
||||
at http://sourceforge.net/tracker/?func=add&group_id=1913&atid=101913
|
||||
or send mail to %s and include the following information:
|
||||
- the URL or file you are testing
|
||||
- your commandline arguments and/or configuration.
|
||||
- the output of a debug run with option "-Dall" of the executed command
|
||||
- the system information below.
|
||||
|
||||
Disclosing some of the information above due to privacy reasons is ok.
|
||||
I will try to help you nonetheless, but you have to give me something
|
||||
I can work with ;) .
|
||||
""") % linkcheck.configuration.Email
|
||||
etype, value = sys.exc_info()[:2]
|
||||
print >> stderr, etype, value
|
||||
traceback.print_exc()
|
||||
print_app_info()
|
||||
print >> stderr, os.linesep, \
|
||||
_("******** LinkChecker internal error, over and out ********")
|
||||
sys.exit(1)
|
||||
|
||||
|
||||
def print_app_info ():
|
||||
"""
|
||||
Print system and application info to stderr.
|
||||
"""
|
||||
print >> stderr, _("System info:")
|
||||
print >> stderr, linkcheck.configuration.App
|
||||
print >> stderr, _("Python %s on %s") % (sys.version, sys.platform)
|
||||
for key in ("LC_ALL", "LC_MESSAGES", "http_proxy", "ftp_proxy"):
|
||||
value = os.getenv(key)
|
||||
if value is not None:
|
||||
print >> stderr, key, "=", repr(value)
|
||||
|
||||
|
||||
def urljoin (parent, url, scheme):
|
||||
"""
|
||||
If url is relative, join parent and url. Else leave url as-is.
|
||||
|
|
@ -160,7 +117,7 @@ class UrlBase (object):
|
|||
# valid or not
|
||||
self.valid = True
|
||||
# list of warnings (without duplicates)
|
||||
self.warning = linkcheck.containers.SetList()
|
||||
self.warnings = linkcheck.containers.SetList()
|
||||
# list of infos (without duplicates)
|
||||
self.info = linkcheck.containers.SetList()
|
||||
# download time
|
||||
|
|
@ -217,11 +174,11 @@ class UrlBase (object):
|
|||
"""
|
||||
return False
|
||||
|
||||
def add_warning (self, s):
|
||||
def add_warning (self, s, tag=None):
|
||||
"""
|
||||
Add a warning string.
|
||||
"""
|
||||
self.warning.append(s)
|
||||
self.warnings.append((tag, s))
|
||||
|
||||
def add_info (self, s):
|
||||
"""
|
||||
|
|
@ -234,7 +191,7 @@ class UrlBase (object):
|
|||
Fill attributes from cache data.
|
||||
"""
|
||||
self.result = cache_data["result"]
|
||||
self.warning.extend(cache_data["warning"])
|
||||
self.warnings.extend(cache_data["warnings"])
|
||||
self.info.extend(cache_data["info"])
|
||||
self.valid = cache_data["valid"]
|
||||
self.dltime = cache_data["dltime"]
|
||||
|
|
@ -246,7 +203,7 @@ class UrlBase (object):
|
|||
Return all data values that should be put in the cache.
|
||||
"""
|
||||
return {"result": self.result,
|
||||
"warning": self.warning,
|
||||
"warnings": self.warnings,
|
||||
"info": self.info,
|
||||
"valid": self.valid,
|
||||
"dltime": self.dltime,
|
||||
|
|
@ -297,7 +254,8 @@ class UrlBase (object):
|
|||
# check url warnings
|
||||
effectiveurl = urlparse.urlunsplit(self.urlparts)
|
||||
if self.url != effectiveurl:
|
||||
self.add_warning(_("Effective URL %r.") % effectiveurl)
|
||||
self.add_warning(_("Effective URL %r.") % effectiveurl,
|
||||
tag="url-effective-url")
|
||||
self.url = effectiveurl
|
||||
except linkcheck.LinkCheckerError, msg:
|
||||
self.set_result(linkcheck.strformat.unicode_safe(msg),
|
||||
|
|
@ -315,11 +273,12 @@ class UrlBase (object):
|
|||
if is_idn:
|
||||
self.add_warning(_("""URL %r has a unicode domain name which
|
||||
is not yet widely supported. You should use
|
||||
the URL %r instead.""") % (self.base_url, base_url))
|
||||
the URL %r instead.""") % (self.base_url, base_url),
|
||||
tag="url-unicode-domain")
|
||||
elif self.base_url != base_url:
|
||||
self.add_warning(
|
||||
_("Base URL is not properly normed. Normed URL is %(url)s.") % \
|
||||
{'url': base_url})
|
||||
{'url': base_url}, tag="url-unnormed")
|
||||
# make url absolute
|
||||
if self.base_ref:
|
||||
# use base reference as parent url
|
||||
|
|
@ -384,7 +343,7 @@ class UrlBase (object):
|
|||
raise
|
||||
except:
|
||||
self.consumer.interrupted(self)
|
||||
internal_error()
|
||||
linkcheck.checker.internal_error()
|
||||
|
||||
def add_country_info (self):
|
||||
"""
|
||||
|
|
@ -541,7 +500,8 @@ class UrlBase (object):
|
|||
for cur_anchor, line, column, name, base in h.urls:
|
||||
if cur_anchor == self.anchor:
|
||||
return
|
||||
self.add_warning(_("Anchor #%s not found.") % self.anchor)
|
||||
self.add_warning(_("Anchor #%s not found.") % self.anchor,
|
||||
tag="url-anchor-not-found")
|
||||
|
||||
def set_extern (self, url):
|
||||
"""
|
||||
|
|
@ -596,7 +556,8 @@ class UrlBase (object):
|
|||
return
|
||||
match = warningregex.search(self.get_content())
|
||||
if match:
|
||||
self.add_warning(_("Found %r in link contents.") % match.group())
|
||||
self.add_warning(_("Found %r in link contents.") % match.group(),
|
||||
tag="url-warnregex-found")
|
||||
|
||||
def check_size (self):
|
||||
"""
|
||||
|
|
@ -607,7 +568,8 @@ class UrlBase (object):
|
|||
if maxbytes is not None and self.dlsize >= maxbytes:
|
||||
self.add_warning(_("Content size %s is larger than %s.") % \
|
||||
(linkcheck.strformat.strsize(self.dlsize),
|
||||
linkcheck.strformat.strsize(maxbytes)))
|
||||
linkcheck.strformat.strsize(maxbytes)),
|
||||
tag="url-content-too-large")
|
||||
|
||||
def parse_url (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -77,6 +77,7 @@ class Configuration (dict):
|
|||
self['trace'] = False
|
||||
self["verbose"] = False
|
||||
self["warnings"] = True
|
||||
self["ignorewarnings"] = []
|
||||
self['quiet'] = False
|
||||
self["anchors"] = False
|
||||
self["anchorcaching"] = True
|
||||
|
|
@ -270,18 +271,10 @@ class Configuration (dict):
|
|||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self[key]['parts'] = [f.strip() \
|
||||
for f in cfgparser.get(key, 'parts').split(',')]
|
||||
self[key]['parts'] = [f.strip() for f in \
|
||||
cfgparser.get(key, 'parts').split(',')]
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
logger = cfgparser.get(section, "log")
|
||||
if linkcheck.Loggers.has_key(logger):
|
||||
self['logger'] = self.logger_new(logger)
|
||||
else:
|
||||
linkcheck.log.warn(_("invalid log option %r"), logger)
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["warnings"] = cfgparser.getboolean(section, "warnings")
|
||||
except ConfigParser.Error, msg:
|
||||
|
|
@ -302,6 +295,14 @@ class Configuration (dict):
|
|||
self["status"] = cfgparser.getboolean(section, "status")
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
logger = cfgparser.get(section, "log")
|
||||
if linkcheck.Loggers.has_key(logger):
|
||||
self['logger'] = self.logger_new(logger)
|
||||
else:
|
||||
linkcheck.log.warn(_("invalid log option %r"), logger)
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
filelist = cfgparser.get(section, "fileoutput").split(",")
|
||||
for arg in filelist:
|
||||
|
|
@ -420,6 +421,11 @@ class Configuration (dict):
|
|||
i += 1
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self['ignorewarnings'] = [f.strip() for f in \
|
||||
cfgparser.get(section, 'ignorewarnings').split(',')]
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
i = 1
|
||||
while 1:
|
||||
|
|
|
|||
|
|
@ -77,8 +77,8 @@ class TestLogger (linkcheck.logger.Logger):
|
|||
for info in url_data.info:
|
||||
self.result.append(u"info %s" % info)
|
||||
if self.has_part('warning'):
|
||||
for warning in url_data.warning:
|
||||
self.result.append(u"warning %s" % warning)
|
||||
for warning in url_data.warnings:
|
||||
self.result.append(u"warning %s" % warning[1])
|
||||
if self.has_part('result'):
|
||||
self.result.append(url_data.valid and u"valid" or u"error")
|
||||
# note: do not append url_data.result since this is
|
||||
|
|
|
|||
|
|
@ -199,7 +199,7 @@ class Logger (object):
|
|||
self.number += 1
|
||||
if not url_data.valid:
|
||||
self.errors += 1
|
||||
self.warnings += len(url_data.warning)
|
||||
self.warnings += len(url_data.warnings)
|
||||
if do_print:
|
||||
self.log_url(url_data)
|
||||
|
||||
|
|
|
|||
|
|
@ -102,7 +102,7 @@ class CSVLogger (linkcheck.logger.Logger):
|
|||
for s in [url_data.base_url or u"", url_data.recursion_level,
|
||||
url_data.parent_url or u"", url_data.base_ref or u"",
|
||||
url_data.result,
|
||||
os.linesep.join(url_data.warning),
|
||||
os.linesep.join([x[1] for x in url_data.warnings]),
|
||||
os.linesep.join(url_data.info),
|
||||
url_data.valid, url_data.url or u"",
|
||||
url_data.line, url_data.column,
|
||||
|
|
|
|||
|
|
@ -132,7 +132,7 @@ class HtmlLogger (linkcheck.logger.Logger):
|
|||
self.write_checktime(url_data)
|
||||
if url_data.info and self.has_part("info"):
|
||||
self.write_info(url_data)
|
||||
if url_data.warning and self.has_part("warning"):
|
||||
if url_data.warnings and self.has_part("warning"):
|
||||
self.write_warning(url_data)
|
||||
if self.has_part("result"):
|
||||
self.write_result(url_data)
|
||||
|
|
@ -249,10 +249,10 @@ class HtmlLogger (linkcheck.logger.Logger):
|
|||
|
||||
def write_warning (self, url_data):
|
||||
"""
|
||||
Write url_data.warning.
|
||||
Write url_data.warnings.
|
||||
"""
|
||||
sep = u"<br>"+os.linesep
|
||||
text = sep.join([cgi.escape(x) for x in url_data.warning])
|
||||
text = sep.join([cgi.escape(x[1]) for x in url_data.warnings])
|
||||
self.writeln(u"<tr><td bgcolor=\""+self.colorwarning+u"\" "+
|
||||
u"valign=\"top\">"+self.part("warning")+
|
||||
u"</td><td bgcolor=\""+self.colorwarning+u"\">"+
|
||||
|
|
|
|||
|
|
@ -95,6 +95,7 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
"""
|
||||
if self.fd is None:
|
||||
return
|
||||
log_warnings = [x[1] for x in url_data.warnings]
|
||||
self.writeln(u"insert into %(table)s(urlname,recursionlevel,"
|
||||
"parentname,baseref,valid,result,warning,info,url,line,col,"
|
||||
"name,checktime,dltime,dlsize,cached) values ("
|
||||
|
|
@ -122,7 +123,7 @@ class SQLLogger (linkcheck.logger.Logger):
|
|||
'base_ref': sqlify((url_data.base_ref or u"")),
|
||||
'valid': intify(url_data.valid),
|
||||
'result': sqlify(url_data.result),
|
||||
'warning': sqlify(os.linesep.join(url_data.warning)),
|
||||
'warning': sqlify(os.linesep.join(log_warnings)),
|
||||
'info': sqlify(os.linesep.join(url_data.info)),
|
||||
'url': sqlify(linkcheck.url.url_quote(url_data.url or u"")),
|
||||
'line': url_data.line,
|
||||
|
|
|
|||
|
|
@ -129,7 +129,7 @@ class TextLogger (linkcheck.logger.Logger):
|
|||
self.write_checktime(url_data)
|
||||
if url_data.info and self.has_part('info'):
|
||||
self.write_info(url_data)
|
||||
if url_data.warning and self.has_part('warning'):
|
||||
if url_data.warnings and self.has_part('warning'):
|
||||
self.write_warning(url_data)
|
||||
if self.has_part('result'):
|
||||
self.write_result(url_data)
|
||||
|
|
@ -213,7 +213,8 @@ class TextLogger (linkcheck.logger.Logger):
|
|||
Write url_data.warning.
|
||||
"""
|
||||
self.write(self.part("warning") + self.spaces("warning"))
|
||||
self.writeln(self.wrap(url_data.warning, 65), color=self.colorwarning)
|
||||
log_warnings = [x[1] for x in url_data.warnings]
|
||||
self.writeln(self.wrap(log_warnings, 65), color=self.colorwarning)
|
||||
|
||||
def write_result (self, url_data):
|
||||
"""
|
||||
|
|
|
|||
44
linkchecker
44
linkchecker
|
|
@ -70,22 +70,22 @@ Usage = _("""USAGE\tlinkchecker [options] file-or-url...
|
|||
""")
|
||||
|
||||
Notes = _("""NOTES
|
||||
o URLs on the command line starting with "ftp." are treated like
|
||||
"ftp://ftp.", URLs starting with "www." are treated like "http://www.".
|
||||
You can also give local files as arguments.
|
||||
o If you have your system configured to automatically establish a
|
||||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local system.
|
||||
See the --ignore-url option on how to prevent this.
|
||||
o Javascript links are currently ignored.
|
||||
o If your platform does not support threading, LinkChecker disables it
|
||||
automatically.
|
||||
o You can supply multiple user/password pairs in a configuration file.
|
||||
o To use proxies set $http_proxy, $https_proxy, $ftp_proxy, $gopher_proxy
|
||||
on Unix or Windows.
|
||||
On a Mac use the Internet Config.
|
||||
o When checking 'news:' links the given NNTP host doesn't need to be the
|
||||
same as the host of the user browsing your pages.
|
||||
o URLs on the command line starting with "ftp." are treated like
|
||||
"ftp://ftp.", URLs starting with "www." are treated like "http://www.".
|
||||
You can also give local files as arguments.
|
||||
o If you have your system configured to automatically establish a
|
||||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local system.
|
||||
See the --ignore-url option on how to prevent this.
|
||||
o Javascript links are currently ignored.
|
||||
o If your platform does not support threading, LinkChecker disables it
|
||||
automatically.
|
||||
o You can supply multiple user/password pairs in a configuration file.
|
||||
o To use proxies set $http_proxy, $https_proxy, $ftp_proxy, $gopher_proxy
|
||||
on Unix or Windows.
|
||||
On a Mac use the Internet Config.
|
||||
o When checking 'news:' links the given NNTP host doesn't need to be the
|
||||
same as the host of the user browsing your pages.
|
||||
""")
|
||||
|
||||
RegularExpressions = _("""REGULAR EXPRESSIONS
|
||||
|
|
@ -128,7 +128,7 @@ You can skip the "ftp://" url part if the domain starts with "ftp.":
|
|||
linkchecker -r0 ftp.linux.org
|
||||
""")
|
||||
|
||||
Logertypes = _(r"""OUTPUT TYPES
|
||||
LoggerTypes = _(r"""OUTPUT TYPES
|
||||
Note that by default only errors and warnings are logged.
|
||||
|
||||
text Standard text output, logging URLs in keyword: argument fashion.
|
||||
|
|
@ -150,6 +150,12 @@ blacklist
|
|||
none Logs nothing. Suitable for scripts.
|
||||
""")
|
||||
|
||||
Warnings = _(r"""IGNORE WARNINGS
|
||||
The following warnings are recognized in the ignorewarnings config
|
||||
file entry:
|
||||
""")
|
||||
for tag, desc in linkcheck.checker.Warnings.items():
|
||||
Warnings += " o %s%s %s%s" % (tag, os.linesep, desc, os.linesep)
|
||||
|
||||
def encode (s, codec="iso8859-15"):
|
||||
"""
|
||||
|
|
@ -287,8 +293,8 @@ class LCOptionParser (optparse.OptionParser, object):
|
|||
"""
|
||||
Print translated help text.
|
||||
"""
|
||||
s = u"%s\n%s\n%s\n%s\n%s" % (self.format_help(), RegularExpressions,
|
||||
Notes, Retval, Examples)
|
||||
s = u"%s\n%s\n%s\n%s\n%s\n%s\n%s" % (self.format_help(),
|
||||
Examples, LoggerTypes, RegularExpressions, Notes, Retval, Warnings)
|
||||
s = s.encode("iso-8859-1", "replace")
|
||||
if os.name != 'posix':
|
||||
linkcheck.strformat.paginate(s)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
#!/bin/sh
|
||||
# run python interpreter with current dir as search path, and remove all
|
||||
# locale and proxy settings
|
||||
env -u ftp_proxy -u http_proxy -u LANGUAGE -u LC_ALL -u LC_CTYPE LANG=C PYTHONPATH=`pwd` python2.4 $*
|
||||
PYVER=2.4
|
||||
env -u ftp_proxy -u http_proxy -u LANGUAGE -u LC_ALL -u LC_CTYPE LANG=C PYTHONPATH=`pwd` python${PYVER} $*
|
||||
|
|
|
|||
Loading…
Reference in a new issue