2011-05-20 19:10:31 +00:00
|
|
|
# Sample configuration file; see the linkcheckerrc(5) man page or
|
2011-02-17 18:59:02 +00:00
|
|
|
# execute linkchecker -h for help on these options.
|
2016-01-23 12:28:15 +00:00
|
|
|
# Commandline options override these settings.
|
2000-02-26 10:24:46 +00:00
|
|
|
|
2011-04-02 08:38:41 +00:00
|
|
|
##################### output configuration ##########################
|
2000-02-26 10:24:46 +00:00
|
|
|
[output]
|
2022-09-22 18:24:55 +00:00
|
|
|
# enable debug messages; see 'linkchecker -h' for valid debug names, example:
|
2004-08-16 19:21:36 +00:00
|
|
|
#debug=all
|
2004-01-03 13:27:47 +00:00
|
|
|
# print status output
|
|
|
|
|
#status=1
|
2002-02-14 15:33:53 +00:00
|
|
|
# change the logging type
|
2022-09-22 18:24:55 +00:00
|
|
|
#log=text
|
2000-07-15 14:14:16 +00:00
|
|
|
# turn on/off --verbose
|
2022-09-22 18:24:55 +00:00
|
|
|
#verbose=0
|
2000-07-15 14:14:16 +00:00
|
|
|
# turn on/off --warnings
|
2022-09-22 18:24:55 +00:00
|
|
|
#warnings=1
|
2000-07-15 14:14:16 +00:00
|
|
|
# turn on/off --quiet
|
2022-09-22 18:24:55 +00:00
|
|
|
#quiet=0
|
|
|
|
|
# additional file output, example:
|
2004-08-25 21:32:59 +00:00
|
|
|
#fileoutput = text, html, gml, sql
|
2022-07-21 15:32:27 +00:00
|
|
|
# errors to ignore (URL regular expression, message regular expression)
|
|
|
|
|
#ignoreerrors=
|
|
|
|
|
# ignore all errors for broken.example.com:
|
|
|
|
|
# ^https?://broken.example.com/
|
|
|
|
|
# ignore SSL errors for dev.example.com:
|
|
|
|
|
# ^https://dev.example.com/ ^SSLError .*
|
2001-02-20 09:32:36 +00:00
|
|
|
|
2011-04-02 08:38:41 +00:00
|
|
|
|
2005-05-08 20:07:30 +00:00
|
|
|
##################### logger configuration ##########################
|
|
|
|
|
# logger output part names:
|
2010-12-15 12:24:31 +00:00
|
|
|
# all For all parts
|
|
|
|
|
# realurl The full url link
|
|
|
|
|
# result Valid or invalid, with messages
|
|
|
|
|
# extern 1 or 0, only in some logger types reported
|
|
|
|
|
# base <base href=...>
|
|
|
|
|
# name <a href=...>name</a> and <img alt="name">
|
|
|
|
|
# parenturl The referrer URL if there is any
|
|
|
|
|
# info Some additional info, e.g. FTP welcome messages
|
|
|
|
|
# warning Warnings
|
|
|
|
|
# dltime Download time
|
|
|
|
|
# checktime Check time
|
|
|
|
|
# url The original url name, can be relative
|
|
|
|
|
# intro The blurb at the beginning, "starting at ..."
|
|
|
|
|
# outro The blurb at the end, "found x errors ..."
|
|
|
|
|
# stats Statistics including URL lengths and contents.
|
2000-02-26 10:24:46 +00:00
|
|
|
|
2000-06-03 12:50:19 +00:00
|
|
|
# each Logger can have separate configuration parameters
|
2004-08-25 21:32:59 +00:00
|
|
|
|
2000-06-03 12:50:19 +00:00
|
|
|
# standard text logger
|
|
|
|
|
[text]
|
2002-02-14 15:33:53 +00:00
|
|
|
#filename=linkchecker-out.txt
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2004-08-25 21:32:59 +00:00
|
|
|
# colors for the various parts, syntax is <color> or <type>;<color>
|
|
|
|
|
# type can be bold, light, blink, invert
|
|
|
|
|
# color can be default, black, red, green, yellow, blue, purple, cyan, white,
|
|
|
|
|
# Black, Red, Green, Yellow, Blue, Purple, Cyan, White
|
2022-09-22 18:24:55 +00:00
|
|
|
#colorparent=default
|
2004-08-25 21:32:59 +00:00
|
|
|
#colorurl=default
|
|
|
|
|
#colorname=default
|
|
|
|
|
#colorreal=cyan
|
|
|
|
|
#colorbase=purple
|
|
|
|
|
#colorvalid=bold;green
|
|
|
|
|
#colorinvalid=bold;red
|
|
|
|
|
#colorinfo=default
|
2004-09-08 12:44:31 +00:00
|
|
|
#colorwarning=bold;yellow
|
2004-08-25 21:32:59 +00:00
|
|
|
#colordltime=default
|
|
|
|
|
#colorreset=default
|
2000-06-03 12:50:19 +00:00
|
|
|
|
|
|
|
|
# GML logger
|
|
|
|
|
[gml]
|
2002-02-14 15:33:53 +00:00
|
|
|
#filename=linkchecker-out.gml
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2010-10-25 19:42:11 +00:00
|
|
|
# valid encodings are listed in http://docs.python.org/library/codecs.html#standard-encodings
|
2022-09-22 18:24:55 +00:00
|
|
|
# example:
|
2004-10-27 09:25:05 +00:00
|
|
|
#encoding=utf_16
|
2000-06-03 12:50:19 +00:00
|
|
|
|
2005-01-28 11:51:12 +00:00
|
|
|
# DOT logger
|
|
|
|
|
[dot]
|
|
|
|
|
#filename=linkchecker-out.dot
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2005-01-28 11:51:12 +00:00
|
|
|
# default encoding is ascii since the original DOT format does not
|
2022-09-22 18:24:55 +00:00
|
|
|
# support other charsets, example:
|
2005-01-28 11:51:12 +00:00
|
|
|
#encoding=iso-8859-15
|
|
|
|
|
|
2000-06-03 12:50:19 +00:00
|
|
|
# CSV logger
|
|
|
|
|
[csv]
|
2002-02-14 15:33:53 +00:00
|
|
|
#filename=linkchecker-out.csv
|
2021-11-29 19:48:50 +00:00
|
|
|
#separator=;
|
2005-01-20 22:16:01 +00:00
|
|
|
#quotechar="
|
2022-09-22 18:24:55 +00:00
|
|
|
#dialect=excel
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2000-06-03 12:50:19 +00:00
|
|
|
|
|
|
|
|
# SQL logger
|
|
|
|
|
[sql]
|
2002-02-14 15:33:53 +00:00
|
|
|
#filename=linkchecker-out.sql
|
|
|
|
|
#dbname=linksdb
|
2005-01-20 22:16:01 +00:00
|
|
|
#separator=;
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2000-06-03 12:50:19 +00:00
|
|
|
|
|
|
|
|
# HTML logger
|
|
|
|
|
[html]
|
2002-02-14 15:33:53 +00:00
|
|
|
#filename=linkchecker-out.html
|
2001-01-22 23:02:54 +00:00
|
|
|
# colors for the various parts
|
2002-05-14 23:01:48 +00:00
|
|
|
#colorbackground=#fff7e5
|
|
|
|
|
#colorurl=#dcd5cf
|
|
|
|
|
#colorborder=#000000
|
|
|
|
|
#colorlink=#191c83
|
2004-08-28 12:39:12 +00:00
|
|
|
#colorwarning=#e0954e
|
|
|
|
|
#colorerror=#db4930
|
|
|
|
|
#colorok=#3ba557
|
2005-05-08 20:07:30 +00:00
|
|
|
#parts=all
|
2000-06-03 12:50:19 +00:00
|
|
|
|
2020-08-23 16:19:26 +00:00
|
|
|
# failures logger
|
|
|
|
|
[failures]
|
2022-08-23 18:21:53 +00:00
|
|
|
#filename=$XDG_DATA_HOME/linkchecker/failures
|
2000-06-03 12:50:19 +00:00
|
|
|
|
2005-07-15 21:58:24 +00:00
|
|
|
# custom xml logger
|
2004-10-27 09:25:05 +00:00
|
|
|
[xml]
|
2022-09-22 18:24:55 +00:00
|
|
|
#filename=linkchecker-out.xml
|
|
|
|
|
# system encoding is used by default. Example:
|
2004-10-27 09:25:05 +00:00
|
|
|
#encoding=iso-8859-1
|
|
|
|
|
|
2005-07-15 21:58:24 +00:00
|
|
|
# GraphXML logger
|
|
|
|
|
[gxml]
|
2022-09-22 18:24:55 +00:00
|
|
|
#filename=linkchecker-out.gxml
|
|
|
|
|
# system encoding is used by default. Example:
|
2005-07-15 21:58:24 +00:00
|
|
|
#encoding=iso-8859-1
|
|
|
|
|
|
2012-09-18 10:12:00 +00:00
|
|
|
# Sitemap logger
|
|
|
|
|
[sitemap]
|
2022-09-22 18:24:55 +00:00
|
|
|
#filename=linkchecker-out.sitemap.xml
|
|
|
|
|
#encoding=utf-8
|
|
|
|
|
#priority=0.5
|
|
|
|
|
#frequency=daily
|
2012-09-18 10:12:00 +00:00
|
|
|
|
2011-04-02 08:38:41 +00:00
|
|
|
|
|
|
|
|
##################### checking options ##########################
|
2000-02-26 10:24:46 +00:00
|
|
|
[checking]
|
2000-07-15 14:14:16 +00:00
|
|
|
# number of threads
|
2015-09-15 09:15:04 +00:00
|
|
|
#threads=10
|
2006-05-15 19:04:21 +00:00
|
|
|
# connection timeout in seconds
|
|
|
|
|
#timeout=60
|
2014-02-28 23:12:34 +00:00
|
|
|
# Time to wait for checks to finish after the user aborts the first time
|
|
|
|
|
# (with Ctrl-C or the abort button).
|
|
|
|
|
#aborttimeout=300
|
|
|
|
|
# The recursion level determines how many times links inside pages are followed.
|
2022-09-22 18:24:55 +00:00
|
|
|
#recursionlevel=-1
|
2000-07-15 14:14:16 +00:00
|
|
|
# Basic NNTP server. Overrides NNTP_SERVER environment variable.
|
2002-02-14 15:33:53 +00:00
|
|
|
#nntpserver=
|
2022-09-22 18:24:55 +00:00
|
|
|
# parse a cookiefile for initial cookie data, example:
|
2012-03-22 21:29:56 +00:00
|
|
|
#cookiefile=/path/to/cookies.txt
|
2011-07-25 19:09:49 +00:00
|
|
|
# User-Agent header string to send to HTTP web servers
|
2022-09-22 18:24:55 +00:00
|
|
|
# Note that robots.txt are always checked with the original User-Agent. Example:
|
2012-09-21 13:51:44 +00:00
|
|
|
#useragent=Mozilla/5.0 (compatible; Googlebot/2.1; +http://www.google.com/bot.html)
|
2012-06-10 11:18:35 +00:00
|
|
|
# When checking finishes, write a memory dump to a temporary file.
|
|
|
|
|
# The memory dump is written both when checking finishes normally
|
|
|
|
|
# and when checking gets canceled.
|
|
|
|
|
# The memory dump only works if the python-meliae package is installed.
|
|
|
|
|
# Otherwise a warning is printed to install it.
|
|
|
|
|
#debugmemory=0
|
2012-06-10 12:47:27 +00:00
|
|
|
# When checking absolute URLs inside local files, the given root directory
|
|
|
|
|
# is used as base URL.
|
|
|
|
|
# Note that the given directory must have URL syntax, so it must use a slash
|
|
|
|
|
# to join directories instead of a backslash.
|
|
|
|
|
# And the given directory must end with a slash.
|
|
|
|
|
# Unix example:
|
|
|
|
|
#localwebroot=/var/www/
|
|
|
|
|
# Windows example:
|
|
|
|
|
#localwebroot=/C|/public_html/
|
2013-12-12 21:17:57 +00:00
|
|
|
# Check SSL certificates. Set to an absolute pathname for a custom
|
|
|
|
|
# CA cert bundle to use. Set to zero to disable SSL certificate verification.
|
|
|
|
|
#sslverify=1
|
2012-09-03 18:17:49 +00:00
|
|
|
# Stop checking new URLs after the given number of seconds. Same as if the
|
2022-09-22 18:24:55 +00:00
|
|
|
# user hits Ctrl-C after X seconds. Example:
|
2012-09-03 18:17:49 +00:00
|
|
|
#maxrunseconds=600
|
2020-08-09 16:05:34 +00:00
|
|
|
# Don't download files larger than the given number of bytes
|
|
|
|
|
#maxfilesizedownload=5242880
|
|
|
|
|
# Don't parse files larger than the given number of bytes
|
|
|
|
|
#maxfilesizeparse=1048576
|
2012-10-14 09:13:55 +00:00
|
|
|
# Maximum number of URLs to check. New URLs will not be queued after the
|
2022-09-22 18:24:55 +00:00
|
|
|
# given number of URLs is checked. Example:
|
2012-10-14 09:13:55 +00:00
|
|
|
#maxnumurls=153
|
2014-02-28 23:12:34 +00:00
|
|
|
# Maximum number of requests per second to one host.
|
|
|
|
|
#maxrequestspersecond=10
|
2020-08-09 16:05:34 +00:00
|
|
|
# Respect the instructions in any robots.txt files
|
|
|
|
|
#robotstxt=1
|
2022-09-22 18:24:55 +00:00
|
|
|
# Allowed URL schemes as a comma-separated list. Example:
|
2014-02-28 23:12:34 +00:00
|
|
|
#allowedschemes=http,https
|
2021-06-21 18:45:19 +00:00
|
|
|
# Size of the result cache. Checking more urls might increase memory usage during runtime
|
|
|
|
|
#resultcachesize=100000
|
2011-04-02 08:38:41 +00:00
|
|
|
|
|
|
|
|
##################### filtering options ##########################
|
2000-03-07 22:47:50 +00:00
|
|
|
[filtering]
|
2006-09-21 14:30:27 +00:00
|
|
|
#ignore=
|
2006-11-17 20:16:31 +00:00
|
|
|
# ignore everything with 'lconline' in the URL name
|
|
|
|
|
# lconline
|
|
|
|
|
# and ignore everything with 'bookmark' in the URL name
|
|
|
|
|
# bookmark
|
|
|
|
|
# and ignore all mailto: URLs
|
|
|
|
|
# ^mailto:
|
2005-05-09 22:05:21 +00:00
|
|
|
# do not recurse into the following URLs
|
2006-11-17 20:16:31 +00:00
|
|
|
|
2006-09-21 14:30:27 +00:00
|
|
|
#nofollow=
|
2010-10-25 19:42:11 +00:00
|
|
|
# just an example
|
|
|
|
|
# http://www\.example\.com/bla
|
2006-11-17 20:16:31 +00:00
|
|
|
|
2005-07-13 15:03:17 +00:00
|
|
|
# Ignore specified warnings (see linkchecker -h for the list of
|
|
|
|
|
# recognized warnings). Add a comma-separated list of warnings here
|
2005-10-13 21:23:13 +00:00
|
|
|
# that prevent a valid URL from being logged. Note that the warning
|
2022-09-22 18:24:55 +00:00
|
|
|
# will be logged for invalid URLs. Example:
|
2014-02-28 23:12:34 +00:00
|
|
|
#ignorewarnings=url-unicode-domain
|
2006-09-15 09:39:33 +00:00
|
|
|
# Regular expression to add more URLs recognized as internal links.
|
|
|
|
|
# Default is that URLs given on the command line are internal.
|
2010-10-25 19:42:11 +00:00
|
|
|
#internlinks=^http://www\.example\.net/
|
2014-02-28 23:12:34 +00:00
|
|
|
# Check external links
|
2022-09-22 18:24:55 +00:00
|
|
|
#checkextern=0
|
2000-03-07 22:47:50 +00:00
|
|
|
|
2011-04-02 08:38:41 +00:00
|
|
|
|
|
|
|
|
##################### password authentication ##########################
|
2010-10-14 16:36:11 +00:00
|
|
|
[authentication]
|
2012-10-15 12:36:10 +00:00
|
|
|
# WARNING: if you store passwords in this configuration entry, make sure the
|
|
|
|
|
# configuration file is not readable by other users.
|
2010-10-25 20:41:03 +00:00
|
|
|
# Different user/password pairs for different URLs can be provided.
|
2010-10-25 19:42:11 +00:00
|
|
|
# Entries are a triple (URL regular expression, username, password),
|
2000-03-07 22:47:50 +00:00
|
|
|
# separated by whitespace.
|
|
|
|
|
# If the regular expression matches, the given user/password pair is used
|
|
|
|
|
# for authentication. The commandline options -u,-p match every link
|
|
|
|
|
# and therefore override the entries given here. The first match wins.
|
2010-10-25 19:42:11 +00:00
|
|
|
# At the moment, authentication is used for http[s] and ftp links.
|
2006-09-21 14:30:27 +00:00
|
|
|
#entry=
|
2010-10-25 20:41:03 +00:00
|
|
|
# Note that passwords are optional. If any passwords are stored here,
|
|
|
|
|
# this file should not readable by other users.
|
|
|
|
|
# ^https?://www\.example\.com/~calvin/ calvin mypass
|
2010-10-25 19:42:11 +00:00
|
|
|
# ^ftp://www\.example\.com/secret/ calvin
|
2010-10-14 16:36:11 +00:00
|
|
|
|
2020-06-23 16:28:31 +00:00
|
|
|
# if the website requires a login via a page with an HTML form the URL of the
|
|
|
|
|
# page and optionally the username and password input element name attributes
|
|
|
|
|
# can be provided.
|
2010-10-14 16:36:11 +00:00
|
|
|
#loginurl=http://www.example.com/
|
|
|
|
|
|
2020-06-23 16:28:31 +00:00
|
|
|
# The name attributes of the username and password HTML input elements
|
2010-10-14 16:36:11 +00:00
|
|
|
#loginuserfield=login
|
|
|
|
|
#loginpasswordfield=password
|
2020-06-23 16:28:31 +00:00
|
|
|
# Optionally the name attributes of any additional input elements and the values
|
|
|
|
|
# to populate them with. Note that these are submitted without checking
|
2022-09-22 18:24:55 +00:00
|
|
|
# whether matching input elements exist in the HTML form. Example:
|
2010-10-14 16:36:11 +00:00
|
|
|
#loginextrafields=
|
|
|
|
|
# name1:value1
|
|
|
|
|
# name 2:value 2
|
2014-02-28 23:12:34 +00:00
|
|
|
|
|
|
|
|
############################ Plugins ###################################
|
|
|
|
|
#
|
|
|
|
|
# uncomment sections to enable plugins
|
|
|
|
|
|
|
|
|
|
# Check HTML anchors
|
|
|
|
|
#[AnchorCheck]
|
|
|
|
|
|
2014-03-12 18:28:37 +00:00
|
|
|
# Print HTTP header info
|
|
|
|
|
#[HttpHeaderInfo]
|
|
|
|
|
# Comma separated list of header prefixes to print.
|
|
|
|
|
# The names are case insensitive.
|
|
|
|
|
# The default list is empty, so it should be non-empty when activating
|
2022-09-22 18:24:55 +00:00
|
|
|
# this plugin. Example:
|
2014-03-12 18:28:37 +00:00
|
|
|
#prefixes=Server,X-
|
|
|
|
|
|
2014-02-28 23:12:34 +00:00
|
|
|
# Add country info to URLs
|
|
|
|
|
#[LocationInfo]
|
|
|
|
|
|
|
|
|
|
# Run W3C syntax checks
|
|
|
|
|
#[CssSyntaxCheck]
|
|
|
|
|
#[HtmlSyntaxCheck]
|
|
|
|
|
|
|
|
|
|
# Search for regular expression in page contents
|
|
|
|
|
#[RegexCheck]
|
2022-09-22 18:24:55 +00:00
|
|
|
# Example:
|
2014-02-28 23:12:34 +00:00
|
|
|
#warningregex=Oracle Error
|
|
|
|
|
|
|
|
|
|
# Search for viruses in page contents
|
|
|
|
|
#[VirusCheck]
|
2022-09-22 18:24:55 +00:00
|
|
|
#clamavconf=/etc/clamav/clamd.conf
|
2014-02-28 23:12:34 +00:00
|
|
|
|
2022-09-22 18:24:55 +00:00
|
|
|
# Check that SSL certificates have at least the given number of days validity.
|
2014-02-28 23:12:34 +00:00
|
|
|
#[SslCertificateCheck]
|
2022-09-22 18:24:55 +00:00
|
|
|
#sslcertwarndays=30
|
2014-02-28 23:12:34 +00:00
|
|
|
|
2014-04-28 16:13:45 +00:00
|
|
|
# Parse and check links in PDF files
|
|
|
|
|
#[PdfParser]
|
|
|
|
|
|
|
|
|
|
# Parse and check links in Word files
|
|
|
|
|
#[WordParser]
|
|
|
|
|
|
2014-11-11 13:35:18 +00:00
|
|
|
# Parse and check links in Markdown files.
|
|
|
|
|
# Supported links are:
|
|
|
|
|
# <http://autolink.com>
|
|
|
|
|
# [name](http://link.com "Optional title")
|
|
|
|
|
# [id]: http://link.com "Optional title"
|
|
|
|
|
#[MarkdownCheck]
|
|
|
|
|
# Regexp of filename
|
2022-09-22 18:24:55 +00:00
|
|
|
#filename_re=.*\.(markdown|md(own)?|mkdn?)$
|