mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-04 12:54:42 +00:00
new intern/extern handling
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@2584 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
0c2b70cf1e
commit
1a7a771648
16 changed files with 123 additions and 171 deletions
3
TODO
3
TODO
|
|
@ -1,8 +1,5 @@
|
|||
Possible improvements people could work on:
|
||||
|
||||
- [USAGE] rethink intern/extern stuff. Especially when specifying
|
||||
--extern one should not have to also specify --intern suddenly.
|
||||
|
||||
- [BUGFIX] when an URL is found in the cache and it has a broken anchor,
|
||||
the broken anchor name is not displayed as a warning
|
||||
|
||||
|
|
|
|||
|
|
@ -119,9 +119,6 @@
|
|||
# check anchors?
|
||||
#anchors=0
|
||||
#recursionlevel=1
|
||||
# overall strict checking. You can specify for each extern URL
|
||||
# separately if its strict or not. See the [filtering] section
|
||||
#strict=0
|
||||
# supply a regular expression for which warnings are printed if found
|
||||
# in any HTML files.
|
||||
#warningregex=Oracle DB Error
|
||||
|
|
@ -135,15 +132,14 @@
|
|||
# filtering options (see FAQ)
|
||||
# for each extern link we can specify if it is strict or not
|
||||
[filtering]
|
||||
# everything with 'lconline' in the URL name is extern
|
||||
# extern1=lconline 0
|
||||
# everything with 'bookmark' in the URL name is strict
|
||||
# extern2=bookmark 1
|
||||
# links to our domain are intern
|
||||
# internlinks=calvinsplayground\.de
|
||||
# check only syntax of all mail adresses
|
||||
# extern3=^mailto: 1
|
||||
#denyallow=0
|
||||
# ignore everything with 'lconline' in the URL name
|
||||
#ignore1=lconline
|
||||
# and ignore everything with 'bookmark' in the URL name
|
||||
#ignore2=bookmark
|
||||
# and ignore all mailto: URLs
|
||||
#ignore3=^mailto:
|
||||
# do not recurse into the following URLs
|
||||
#nofollow1=http://justahomepage/bla
|
||||
# specify hosts to contact directly without a proxy
|
||||
# value is a regular expression
|
||||
#noproxy1=*\.intra
|
||||
|
|
|
|||
|
|
@ -160,34 +160,6 @@ import linkcheck.checker.nntpurl
|
|||
import linkcheck.checker.errorurl
|
||||
|
||||
|
||||
def set_intern_url (url, klass, config):
|
||||
"""
|
||||
Add intern url pattern for url given on the command line.
|
||||
|
||||
@param url: URL to add
|
||||
@type url: string
|
||||
@param klass: URL class
|
||||
@type klass: class object
|
||||
@param config: configuration data
|
||||
@type config: linkcheck.configuration.Configuration
|
||||
"""
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Set intern url for %r", url)
|
||||
if klass == linkcheck.checker.fileurl.FileUrl:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern pattern ^file:")
|
||||
config['internlinks'].append(linkcheck.get_link_pat("^file:"))
|
||||
elif klass in [linkcheck.checker.httpurl.HttpUrl,
|
||||
linkcheck.checker.httpsurl.HttpsUrl,
|
||||
linkcheck.checker.ftpurl.FtpUrl]:
|
||||
domain = linkcheck.strformat.url_unicode_split(url)[1]
|
||||
domain, is_idn = linkcheck.url.idna_encode(domain)
|
||||
if domain:
|
||||
domain = "://%s" % re.escape(domain)
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Add intern domain %r",
|
||||
domain)
|
||||
# add scheme colon to link pattern
|
||||
config['internlinks'].append(linkcheck.get_link_pat(domain))
|
||||
|
||||
|
||||
def absolute_url (base_url, base_ref, parent_url):
|
||||
"""
|
||||
Search for the absolute url to detect the link type. This does not
|
||||
|
|
@ -212,7 +184,7 @@ def absolute_url (base_url, base_ref, parent_url):
|
|||
|
||||
def get_url_from (base_url, recursion_level, consumer,
|
||||
parent_url=None, base_ref=None, line=0, column=0,
|
||||
name=u"", cmdline=None):
|
||||
name=u"", cmdline=True):
|
||||
"""
|
||||
Get url data from given base data.
|
||||
|
||||
|
|
@ -232,8 +204,6 @@ def get_url_from (base_url, recursion_level, consumer,
|
|||
@type column: number
|
||||
@param name: link name
|
||||
@type name: string
|
||||
@param cmdline: flag if url was given on command line
|
||||
@type cmdline: bool
|
||||
"""
|
||||
base_url = linkcheck.strformat.unicode_safe(base_url)
|
||||
if parent_url is not None:
|
||||
|
|
@ -241,8 +211,6 @@ def get_url_from (base_url, recursion_level, consumer,
|
|||
if base_ref is not None:
|
||||
base_ref = linkcheck.strformat.unicode_safe(base_ref)
|
||||
name = linkcheck.strformat.unicode_safe(name)
|
||||
#if cmdline and linkcheck.url.url_needs_quoting(base_url):
|
||||
# base_url = linkcheck.url.url_quote(base_url)
|
||||
url = absolute_url(base_url, base_ref, parent_url)
|
||||
# test scheme
|
||||
if url.startswith("http:"):
|
||||
|
|
@ -272,13 +240,17 @@ def get_url_from (base_url, recursion_level, consumer,
|
|||
else:
|
||||
# error url, no further checking, just log this
|
||||
klass = linkcheck.checker.errorurl.ErrorUrl
|
||||
if cmdline and not (consumer.config['internlinks'] or
|
||||
consumer.config['externlinks']):
|
||||
# set automatic intern/extern stuff if no filter was given
|
||||
set_intern_url(url, klass, consumer.config)
|
||||
return klass(base_url, recursion_level, consumer,
|
||||
parent_url=parent_url, base_ref=base_ref,
|
||||
line=line, column=column, name=name)
|
||||
url_data = klass(base_url, recursion_level, consumer,
|
||||
parent_url=parent_url, base_ref=base_ref,
|
||||
line=line, column=column, name=name)
|
||||
if cmdline:
|
||||
# add intern URL regex to config for every URL that was given
|
||||
# on the command line
|
||||
pat = url_data.get_intern_pattern()
|
||||
linkcheck.log.debug(linkcheck.LOG_CMDLINE, "Pattern %r", pat)
|
||||
if pat:
|
||||
consumer.config['internlinks'].append(linkcheck.get_link_pat(pat))
|
||||
return url_data
|
||||
|
||||
|
||||
def get_index_html (urls):
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ def _check_morsel (m, host, path):
|
|||
class Cache (linkcheck.lock.AssertLock):
|
||||
"""
|
||||
Store and provide routines for cached data. Currently there are
|
||||
caches for cookies, checked urls, FTP connections and robots.txt
|
||||
caches for cookies, checked URLs, FTP connections and robots.txt
|
||||
contents.
|
||||
|
||||
All public operations (except __init__()) are thread-safe.
|
||||
|
|
@ -62,11 +62,11 @@ class Cache (linkcheck.lock.AssertLock):
|
|||
Initialize the default options.
|
||||
"""
|
||||
super(Cache, self).__init__()
|
||||
# already checked urls
|
||||
# already checked URLs
|
||||
self.checked = {}
|
||||
# urls that are being checked
|
||||
# URLs that are being checked
|
||||
self.in_progress = {}
|
||||
# to-be-checked urls
|
||||
# to-be-checked URLs
|
||||
self.incoming = collections.deque()
|
||||
# downloaded robots.txt files
|
||||
self.robots_txt = {}
|
||||
|
|
|
|||
|
|
@ -51,7 +51,7 @@ def print_duration (duration):
|
|||
|
||||
class Consumer (linkcheck.lock.AssertLock):
|
||||
"""
|
||||
Consume urls from the url queue in a thread-safe manner.
|
||||
Consume URLs from the URL queue in a thread-safe manner.
|
||||
"""
|
||||
|
||||
def __init__ (self, config, cache):
|
||||
|
|
|
|||
|
|
@ -255,3 +255,19 @@ class FileUrl (urlbase.UrlBase):
|
|||
if ro.search(self.get_content()[:30]):
|
||||
getattr(self, "parse_"+key)()
|
||||
return
|
||||
|
||||
def get_intern_pattern (self):
|
||||
"""
|
||||
Get pattern for intern URL matching.
|
||||
|
||||
@return non-empty regex pattern or None
|
||||
@rtype String or None
|
||||
"""
|
||||
absolute = linkcheck.checker.absolute_url
|
||||
url = absolute(self.base_url, self.base_ref, self.parent_url)
|
||||
if not url:
|
||||
return None
|
||||
parts = linkcheck.strformat.url_unicode_split(url)
|
||||
path = parts[2]
|
||||
return "file://%s" % re.escape(path)
|
||||
|
||||
|
|
|
|||
|
|
@ -27,12 +27,13 @@ import linkcheck
|
|||
import urlbase
|
||||
import proxysupport
|
||||
import httpurl
|
||||
import internpaturl
|
||||
import linkcheck.ftpparse._ftpparse as ftpparse
|
||||
|
||||
DEFAULT_TIMEOUT_SECS = 300
|
||||
|
||||
|
||||
class FtpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
|
||||
class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
"""
|
||||
Url link with ftp scheme.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -34,6 +34,7 @@ import linkcheck.robotparser2
|
|||
import linkcheck.httplib2
|
||||
import httpheaders as headers
|
||||
import urlbase
|
||||
import internpaturl
|
||||
import proxysupport
|
||||
|
||||
supportHttps = hasattr(linkcheck.httplib2, "HTTPSConnection") and \
|
||||
|
|
@ -45,7 +46,7 @@ _supported_encodings = ('gzip', 'x-gzip', 'deflate')
|
|||
_is_amazon = re.compile(r'^www\.amazon\.(com|de|ca|fr|co\.(uk|jp))').search
|
||||
|
||||
|
||||
class HttpUrl (urlbase.UrlBase, proxysupport.ProxySupport):
|
||||
class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
||||
"""
|
||||
Url link with http scheme.
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -22,14 +22,14 @@ import urllib
|
|||
|
||||
class ProxySupport (object):
|
||||
"""
|
||||
Get support for proxying and for urls with user:pass@host setting.
|
||||
Get support for proxying and for URLs with user:pass@host setting.
|
||||
"""
|
||||
|
||||
def set_proxy (self, proxy):
|
||||
"""
|
||||
Parse given proxy information and store parsed values.
|
||||
Note that only http:// proxies are supported, both for ftp://
|
||||
and http:// urls.
|
||||
and http:// URLs.
|
||||
"""
|
||||
self.proxy = proxy
|
||||
self.proxyauth = None
|
||||
|
|
|
|||
|
|
@ -518,54 +518,34 @@ class UrlBase (object):
|
|||
@rtype: bool
|
||||
"""
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "extern=%s", self.extern)
|
||||
return self.extern[0] and \
|
||||
(self.consumer.config["externstrictall"] or self.extern[1])
|
||||
return self.extern[0] and self.extern[1]
|
||||
|
||||
def _get_extern (self, url):
|
||||
"""
|
||||
Match URL against intern and extern link patterns, according
|
||||
to the configured denyallow order.
|
||||
Match URL against extern and intern link patterns. If no pattern
|
||||
matches the URL is extern.
|
||||
|
||||
@return: a tuple (is_extern, is_strict)
|
||||
@rtype: tuple (bool, bool)
|
||||
"""
|
||||
if not (self.consumer.config["externlinks"] or \
|
||||
self.consumer.config["internlinks"]):
|
||||
return (0, 0)
|
||||
# deny and allow external checking
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Url %r", url)
|
||||
if self.consumer.config["denyallow"]:
|
||||
for entry in self.consumer.config["externlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
return (1, entry['strict'])
|
||||
for entry in self.consumer.config["internlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Intern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
return (0, 0)
|
||||
return (0, 0)
|
||||
else:
|
||||
for entry in self.consumer.config["internlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Intern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
return (0, 0)
|
||||
for entry in self.consumer.config["externlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
return (1, entry['strict'])
|
||||
return (1, 0)
|
||||
for entry in self.consumer.config["externlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern URL %r", url)
|
||||
return (1, entry['strict'])
|
||||
for entry in self.consumer.config["internlinks"]:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Intern entry %r",
|
||||
entry)
|
||||
match = entry['pattern'].search(url)
|
||||
if (entry['negate'] and not match) or \
|
||||
(match and not entry['negate']):
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Intern URL %r", url)
|
||||
return (0, 0)
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, "Extern URL %r", url)
|
||||
return (1, 0)
|
||||
|
||||
def can_get_content (self):
|
||||
"""
|
||||
|
|
@ -726,6 +706,15 @@ class UrlBase (object):
|
|||
u"name=%r" % self.name,
|
||||
])
|
||||
|
||||
def get_intern_pattern (self):
|
||||
"""
|
||||
Get pattern for intern URL matching.
|
||||
|
||||
@return non-empty regex pattern or None
|
||||
@rtype String or None
|
||||
"""
|
||||
return None
|
||||
|
||||
def __str__ (self):
|
||||
"""
|
||||
Get URL info.
|
||||
|
|
|
|||
|
|
@ -78,11 +78,9 @@ class Configuration (dict):
|
|||
self['quiet'] = False
|
||||
self["anchors"] = False
|
||||
self["anchorcaching"] = True
|
||||
self["externstrictall"] = False
|
||||
self["externlinks"] = []
|
||||
self["internlinks"] = []
|
||||
self["noproxyfor"] = []
|
||||
self["denyallow"] = False
|
||||
self["interactive"] = False
|
||||
# on ftp, password is set by Pythons ftplib
|
||||
self["authentication"] = []
|
||||
|
|
@ -270,8 +268,8 @@ class Configuration (dict):
|
|||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self[key]['fields'] = [f.strip() \
|
||||
for f in cfgparser.get(key, 'fields').split(',')]
|
||||
self[key]['parts'] = [f.strip() \
|
||||
for f in cfgparser.get(key, 'parts').split(',')]
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
|
|
@ -341,11 +339,6 @@ class Configuration (dict):
|
|||
self["recursionlevel"] = num
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["externstrictall"] = \
|
||||
cfgparser.getboolean(section, "externstrictall")
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
arg = cfgparser.get(section, "warningregex")
|
||||
if arg:
|
||||
|
|
@ -415,13 +408,26 @@ class Configuration (dict):
|
|||
try:
|
||||
i = 1
|
||||
while 1:
|
||||
ctuple = cfgparser.get(section, "extern%d" % i).split()
|
||||
ctuple = cfgparser.get(section, "nofollow%d" % i).split()
|
||||
if len(ctuple)!=2:
|
||||
linkcheck.log.error(
|
||||
_("extern%d: syntax error %s\n") % (i, ctuple))
|
||||
_("nofollow%d: syntax error %s\n") % (i, ctuple))
|
||||
break
|
||||
self["externlinks"].append(
|
||||
linkcheck.get_link_pat(ctuple[0], strict=int(ctuple[1])))
|
||||
linkcheck.get_link_pat(ctuple[0], strict=0))
|
||||
i += 1
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
i = 1
|
||||
while 1:
|
||||
ctuple = cfgparser.get(section, "ignore%d" % i).split()
|
||||
if len(ctuple)!=2:
|
||||
linkcheck.log.error(
|
||||
_("ignore%d: syntax error %s\n") % (i, ctuple))
|
||||
break
|
||||
self["externlinks"].append(
|
||||
linkcheck.get_link_pat(ctuple[0], strict=1))
|
||||
i += 1
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
|
@ -430,10 +436,6 @@ class Configuration (dict):
|
|||
linkcheck.get_link_pat(cfgparser.get(section, "internlinks")))
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
try:
|
||||
self["denyallow"] = cfgparser.getboolean(section, "denyallow")
|
||||
except ConfigParser.Error, msg:
|
||||
linkcheck.log.debug(linkcheck.LOG_CHECK, msg)
|
||||
|
||||
def write_boolean_config (self, fp, boolopts):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -86,8 +86,6 @@ def checklink (out=sys.stdout, form=None, env=os.environ):
|
|||
config["recursionlevel"] = int(form["level"].value)
|
||||
config["logger"] = config.logger_new('html', fd=out)
|
||||
config["threads"] = 0
|
||||
if form.has_key('externstrictall'):
|
||||
config['externstrictall'] = True
|
||||
if form.has_key("anchors"):
|
||||
config["anchors"] = True
|
||||
if not form.has_key("errors"):
|
||||
|
|
@ -146,7 +144,7 @@ def checkform (form):
|
|||
if not _is_level(level):
|
||||
raise FormError, _("invalid recursion level")
|
||||
# check options
|
||||
for option in ("externstrictall", "anchors", "errors", "intern"):
|
||||
for option in ("anchors", "errors", "intern"):
|
||||
if form.has_key(option):
|
||||
if not form[option].value == "on":
|
||||
raise FormError, _("invalid %s option syntax") % option
|
||||
|
|
|
|||
|
|
@ -23,6 +23,9 @@ try:
|
|||
except ImportError:
|
||||
import dummy_threading as threading
|
||||
|
||||
import linkcheck
|
||||
import linkcheck.log
|
||||
|
||||
lock_klass = threading.RLock().__class__
|
||||
|
||||
class AssertLock (lock_klass):
|
||||
|
|
@ -36,6 +39,7 @@ class AssertLock (lock_klass):
|
|||
Acquire lock.
|
||||
"""
|
||||
assert not self.is_locked(), "deadlock"
|
||||
#linkcheck.log.debug(linkcheck.LOG_THREAD, "Acquire %s", self, tb=True)
|
||||
super(AssertLock, self).acquire(blocking=blocking)
|
||||
|
||||
def release (self):
|
||||
|
|
@ -43,6 +47,7 @@ class AssertLock (lock_klass):
|
|||
Release lock.
|
||||
"""
|
||||
assert self.is_locked(), "double release"
|
||||
#linkcheck.log.debug(linkcheck.LOG_THREAD, "Release %s", self, tb=True)
|
||||
super(AssertLock, self).release()
|
||||
|
||||
def is_locked (self):
|
||||
|
|
@ -50,3 +55,4 @@ class AssertLock (lock_klass):
|
|||
See if this lock is owned.
|
||||
"""
|
||||
return self._is_owned()
|
||||
|
||||
|
|
|
|||
|
|
@ -339,7 +339,7 @@ def collapse_segments (path):
|
|||
# backslashes to be left alone, and finally quoted with '%5C')
|
||||
# But replacing has several positive effects:
|
||||
# - Prevents path attacks on Windows systems (using \.. parent refs)
|
||||
# - Fixes bad urls where users used backslashes instead of slashes.
|
||||
# - Fixes bad URLs where users used backslashes instead of slashes.
|
||||
# This is a far more probable case than users having an intentional
|
||||
# backslash in the path name.
|
||||
path = path.replace('\\', '/')
|
||||
|
|
|
|||
57
linkchecker
57
linkchecker
|
|
@ -79,7 +79,7 @@ o URLs on the command line starting with "ftp." are treated like
|
|||
o If you have your system configured to automatically establish a
|
||||
connection to the internet (e.g. with diald), it will connect when
|
||||
checking links not pointing to your local system.
|
||||
See the --extern-strict-all option on how to prevent this.
|
||||
See the --ignore-url option on how to prevent this.
|
||||
o Javascript links are currently ignored.
|
||||
o If your platform does not support threading, LinkChecker disables it
|
||||
automatically.
|
||||
|
|
@ -107,7 +107,7 @@ thousands URLs. Use the -r option to restrict the recursion depth.
|
|||
|
||||
Don't connect to mailto: hosts, only check their URL syntax. All other
|
||||
links are checked as usual:
|
||||
linkchecker --intern='!^mailto:' --extern-strict-all www.mysite.org
|
||||
linkchecker --ignore-url=^mailto: www.mysite.org
|
||||
|
||||
Checking a local HTML file on Unix:
|
||||
linkchecker ../bla.html
|
||||
|
|
@ -140,7 +140,7 @@ sql Log check result as SQL script with INSERT commands. An example
|
|||
blacklist
|
||||
Suitable for cron jobs. Logs the check result into a file
|
||||
~/.linkchecker/blacklist which only contains entries with invalid
|
||||
urls and the number of times they have failed.
|
||||
URLs and the number of times they have failed.
|
||||
none Logs nothing. Suitable for scripts.
|
||||
""")
|
||||
|
||||
|
|
@ -396,29 +396,12 @@ group.add_option("-r", "--recursion-level", type="int", dest="recursionlevel",
|
|||
help=_(
|
||||
"""Check recursively all links up to given depth. A negative depth
|
||||
will enable inifinite recursion. Default depth is infinite."""))
|
||||
group.add_option("-i", "--intern", type="string", action="append",
|
||||
dest="intern", help=_(
|
||||
""" regex, --intern=regex
|
||||
Assume URLs that match the given expression as internal.
|
||||
LinkChecker descends recursively only to internal URLs, not to
|
||||
external."""))
|
||||
group.add_option("-e", "--extern", type="string", action="append",
|
||||
group.add_option("--no-follow-url", type="string", action="append",
|
||||
dest="extern", help=_(
|
||||
"""Assume urls that match the given expression as external.
|
||||
Only internal HTML links are checked recursively."""))
|
||||
group.add_option("--extern-strict", type="string", action="append",
|
||||
"""Check but do not recurse into URLs matching the given regex."""))
|
||||
group.add_option("--ignore-url", type="string", action="append",
|
||||
dest="externstrict", help=_(
|
||||
"""Assume urls that match the given expression as strict external.
|
||||
Only internal HTML links are checked recursively."""))
|
||||
group.add_option("-s", "--extern-strict-all", action="store_true",
|
||||
dest="externstrictall", help=_(
|
||||
"""Check only syntax of external links, do not try to connect to them.
|
||||
For local file urls, only local files are internal. For
|
||||
http and ftp urls, all urls at the same domain name are internal."""))
|
||||
group.add_option("-d", "--denyallow", action="store_true", dest="denyallow",
|
||||
help=_(
|
||||
"""Swap checking order to external/internal. Default checking order
|
||||
is internal/external."""))
|
||||
"""Only check syntax of URLs matching the given regex."""))
|
||||
group.add_option("-C", "--cookies", action="store_true", dest="cookies",
|
||||
help=_(
|
||||
"""Accept and send HTTP cookies according to RFC 2109. Only cookies
|
||||
|
|
@ -427,8 +410,7 @@ Sent and accepted cookies are provided as additional logging
|
|||
information."""))
|
||||
group.add_option("-a", "--anchors", action="store_true", dest="anchors",
|
||||
help=_(
|
||||
"""Check HTTP anchor references. This option applies to both internal
|
||||
and external urls. Default is don't check anchors.
|
||||
"""Check HTTP anchor references. Default is don't check anchors.
|
||||
This option implies -w because anchor errors are always warnings."""))
|
||||
group.add_option("--no-anchor-caching", action="store_false",
|
||||
dest="anchorcaching", help=_(
|
||||
|
|
@ -462,12 +444,6 @@ group.add_option("--no-proxy-for", type="string", action="append",
|
|||
going through a proxy."""))
|
||||
optparser.add_option_group(group)
|
||||
|
||||
################# deprecated options ##################
|
||||
group = optparse.OptionGroup(optparser, _("Deprecated options"))
|
||||
group.add_option("-w", "--warnings", action="store_true", dest="warnings",
|
||||
help=_("""Log warnings. This is the default."""))
|
||||
optparser.add_option_group(group)
|
||||
|
||||
################# auto completion #####################
|
||||
if has_optcomplete:
|
||||
optcomplete.autocomplete(optparser)
|
||||
|
|
@ -570,11 +546,6 @@ if options.fileoutput:
|
|||
config['fileoutput'].append(logger)
|
||||
if options.interactive is not None:
|
||||
config['interactive'] = options.interactive
|
||||
if options.intern:
|
||||
pats = [linkcheck.get_link_pat(arg) for arg in options.intern]
|
||||
config["internlinks"].extend(pats)
|
||||
if options.denyallow is not None:
|
||||
config["denyallow"] = options.denyallow
|
||||
if options.nntpserver:
|
||||
config["nntpserver"] = options.nntpserver
|
||||
if options.anchorcaching is not None:
|
||||
|
|
@ -594,8 +565,6 @@ if options.quiet is not None:
|
|||
config['logger'] = config.logger_new('none')
|
||||
if options.recursionlevel is not None:
|
||||
config["recursionlevel"] = options.recursionlevel
|
||||
if options.externstrictall is not None:
|
||||
config["externstrictall"] = options.externstrictall
|
||||
if options.status is not None:
|
||||
config['status'] = options.status
|
||||
if options.threads is not None:
|
||||
|
|
@ -646,10 +615,10 @@ if (linkcheck.logger.gml.GMLLogger in klasses or \
|
|||
if len(args) <= 0:
|
||||
if config['interactive']:
|
||||
urls = raw_input(
|
||||
_("enter one or more urls, separated by white-space\n--> "))
|
||||
_("enter one or more URLs, separated by white-space\n--> "))
|
||||
args = urls.split()
|
||||
else:
|
||||
linkcheck.log.warn(linkcheck.LOG_CMDLINE, _("no files or urls given"))
|
||||
linkcheck.log.warn(linkcheck.LOG_CMDLINE, _("no files or URLs given"))
|
||||
|
||||
# initialize the cache and the consumer model
|
||||
cache = linkcheck.checker.cache.Cache()
|
||||
|
|
@ -661,9 +630,9 @@ for url in args:
|
|||
elif url.lower().startswith("ftp."):
|
||||
url = "ftp://%s" % url
|
||||
url_data = linkcheck.checker.get_url_from(url, 0, consumer, cmdline=True)
|
||||
# add to consumer queue
|
||||
consumer.append_url(url_data)
|
||||
|
||||
############################# check the urls ################################
|
||||
############################# check the URLs ################################
|
||||
if do_profile and not has_profile:
|
||||
linkcheck.log.warn(linkcheck.LOG_CMDLINE,
|
||||
_("The `profile' Python module is not installed,"
|
||||
|
|
@ -697,7 +666,7 @@ elif options.psyco:
|
|||
except ImportError:
|
||||
# no psyco available, just ignore
|
||||
pass
|
||||
linkcheck.checker.check_urls(consumer)
|
||||
linkcheck.checker.check_urls(consumer)
|
||||
#############################################################################
|
||||
|
||||
# interactive input end
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ msgid ""
|
|||
msgstr ""
|
||||
"Project-Id-Version: PACKAGE VERSION\n"
|
||||
"Report-Msgid-Bugs-To: calvin@users.sourceforge.net\n"
|
||||
"POT-Creation-Date: 2005-05-08 22:12+0200\n"
|
||||
"POT-Creation-Date: 2005-05-09 23:59+0200\n"
|
||||
"PO-Revision-Date: YEAR-MO-DA HO:MI+ZONE\n"
|
||||
"Last-Translator: FULL NAME <EMAIL@ADDRESS>\n"
|
||||
"Language-Team: LANGUAGE <LL@li.org>\n"
|
||||
|
|
@ -618,7 +618,12 @@ msgstr ""
|
|||
|
||||
#: ../linkcheck/configuration.py:414
|
||||
#, python-format
|
||||
msgid "extern%d: syntax error %s\n"
|
||||
msgid "nofollow%d: syntax error %s\n"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/configuration.py:427
|
||||
#, python-format
|
||||
msgid "ignore%d: syntax error %s\n"
|
||||
msgstr ""
|
||||
|
||||
#: ../linkcheck/strformat.py:180
|
||||
|
|
|
|||
Loading…
Reference in a new issue