mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-18 03:21:07 +00:00
772 lines
22 KiB
Groff
772 lines
22 KiB
Groff
.\" Man page generated from reStructuredText.
|
|
.
|
|
.
|
|
.nr rst2man-indent-level 0
|
|
.
|
|
.de1 rstReportMargin
|
|
\\$1 \\n[an-margin]
|
|
level \\n[rst2man-indent-level]
|
|
level margin: \\n[rst2man-indent\\n[rst2man-indent-level]]
|
|
-
|
|
\\n[rst2man-indent0]
|
|
\\n[rst2man-indent1]
|
|
\\n[rst2man-indent2]
|
|
..
|
|
.de1 INDENT
|
|
.\" .rstReportMargin pre:
|
|
. RS \\$1
|
|
. nr rst2man-indent\\n[rst2man-indent-level] \\n[an-margin]
|
|
. nr rst2man-indent-level +1
|
|
.\" .rstReportMargin post:
|
|
..
|
|
.de UNINDENT
|
|
. RE
|
|
.\" indent \\n[an-margin]
|
|
.\" old: \\n[rst2man-indent\\n[rst2man-indent-level]]
|
|
.nr rst2man-indent-level -1
|
|
.\" new: \\n[rst2man-indent\\n[rst2man-indent-level]]
|
|
.in \\n[rst2man-indent\\n[rst2man-indent-level]]u
|
|
..
|
|
.TH "LINKCHECKERRC" "5" "August 27, 2024" "10.4.0.post49+g7cf5037e" "LinkChecker"
|
|
.SH NAME
|
|
linkcheckerrc \- configuration file for LinkChecker
|
|
.SH DESCRIPTION
|
|
.sp
|
|
\fBlinkcheckerrc\fP is the configuration file for LinkChecker. The file is
|
|
written in an INI\-style format.
|
|
The default file location is \fB$XDG_CONFIG_HOME/linkchecker/linkcheckerrc\fP
|
|
or else \fB~/.config/linkchecker/linkcheckerrc\fP on Unix,
|
|
\fB%HOMEPATH%\e.config\elinkchecker\elinkcheckerrc\fP on Windows systems.
|
|
.SH SETTINGS
|
|
.SS checking
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBcookiefile=\fP\fIfilename\fP
|
|
Read a file with initial cookie data. The cookie data format is
|
|
explained in \fB\fI\%linkchecker(1)\fP\fP\&.
|
|
Command line option: \fI\%\-\-cookiefile\fP
|
|
.TP
|
|
\fBdebugmemory=\fP[\fB0\fP|\fB1\fP]
|
|
Write memory allocation statistics to a file on exit, requires \X'tty: link https://pypi.org/project/meliae/'\fI\%meliae\fP\X'tty: link'\&.
|
|
The default is not to write the file.
|
|
Command line option: none
|
|
.TP
|
|
\fBlocalwebroot=\fP\fISTRING\fP
|
|
When checking absolute URLs inside local files, the given root
|
|
directory is used as base URL.
|
|
Note that the given directory must have URL syntax, so it must use a
|
|
slash to join directories instead of a backslash. And the given
|
|
directory must end with a slash.
|
|
Command line option: none
|
|
.TP
|
|
\fBrecursionlevel=\fP\fINUMBER\fP
|
|
Check recursively all links up to given depth. A negative depth will
|
|
enable infinite recursion. Default depth is infinite.
|
|
Command line option: \fI\%\-\-recursion\-level\fP
|
|
.TP
|
|
\fBthreads=\fP\fINUMBER\fP
|
|
Generate no more than the given number of threads. Default number of
|
|
threads is 10. To disable threading specify a non\-positive number.
|
|
Command line option: \fI\%\-\-threads\fP
|
|
.TP
|
|
\fBtimeout=\fP\fINUMBER\fP
|
|
Set the timeout for connection attempts in seconds. The default
|
|
timeout is 60 seconds.
|
|
Command line option: \fI\%\-\-timeout\fP
|
|
.TP
|
|
\fBaborttimeout=\fP\fINUMBER\fP
|
|
Time to wait for checks to finish after the user aborts the first
|
|
time (with Ctrl\-C or the abort button). The default abort timeout is
|
|
300 seconds.
|
|
Command line option: none
|
|
.TP
|
|
\fBuseragent=\fP\fISTRING\fP
|
|
Specify the User\-Agent string to send to the HTTP server, for
|
|
example \(dqMozilla/4.0\(dq. The default is \(dqLinkChecker/X.Y\(dq where X.Y is
|
|
the current version of LinkChecker.
|
|
Command line option: \fI\%\-\-user\-agent\fP
|
|
.TP
|
|
\fBsslverify=\fP[\fB0\fP|\fB1\fP|\fIfilename\fP]
|
|
If set to zero disables SSL certificate checking. If set to one (the
|
|
default) enables SSL certificate checking with the provided CA
|
|
certificate file. If a filename is specified, it will be used as the
|
|
certificate file.
|
|
Command line option: none
|
|
.TP
|
|
\fBmaxrunseconds=\fP\fINUMBER\fP
|
|
Stop checking new URLs after the given number of seconds. Same as if
|
|
the user stops (by hitting Ctrl\-C) after the given number of
|
|
seconds.
|
|
The default is not to stop until all URLs are checked.
|
|
Command line option: none
|
|
.TP
|
|
\fBmaxfilesizedownload=\fP\fINUMBER\fP
|
|
Files larger than NUMBER bytes will be ignored, without downloading anything
|
|
if accessed over http and an accurate Content\-Length header was returned.
|
|
No more than this amount of a file will be downloaded.
|
|
The default is 5242880 (5 MB).
|
|
Command line option: none
|
|
.TP
|
|
\fBmaxfilesizeparse=\fP\fINUMBER\fP
|
|
Files larger than NUMBER bytes will not be parsed for links.
|
|
The default is 1048576 (1 MB).
|
|
Command line option: none
|
|
.TP
|
|
\fBmaxnumurls=\fP\fINUMBER\fP
|
|
Maximum number of URLs to check. New URLs will not be queued after
|
|
the given number of URLs is checked.
|
|
The default is to queue and check all URLs.
|
|
Command line option: none
|
|
.TP
|
|
\fBmaxrequestspersecond=\fP\fINUMBER\fP
|
|
Limit the maximum number of HTTP requests per second to one host.
|
|
The average number of requests per second is approximately one third of the
|
|
maximum. Values less than 1 and at least 0.001 can be used.
|
|
To use values greater than 10, the HTTP server must return a
|
|
\fBLinkChecker\fP response header.
|
|
The default is 10.
|
|
Command line option: none
|
|
.TP
|
|
\fBrobotstxt=\fP[\fB0\fP|\fB1\fP]
|
|
When using http, fetch robots.txt, and confirm whether each URL should
|
|
be accessed before checking.
|
|
The default is to use robots.txt files.
|
|
Command line option: \fI\%\-\-no\-robots\fP
|
|
.TP
|
|
\fBallowedschemes=\fP\fINAME\fP[\fB,\fP\fINAME\fP\&...]
|
|
Allowed URL schemes as comma\-separated list.
|
|
Command line option: none
|
|
.TP
|
|
\fBresultcachesize=\fP\fINUMBER\fP
|
|
Set the result cache size.
|
|
The default is 100 000 URLs.
|
|
Command line option: none
|
|
.UNINDENT
|
|
.SS filtering
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBignore=\fP\fIREGEX\fP (\fI\%MULTILINE\fP)
|
|
Only check syntax of URLs matching the given regular expressions.
|
|
Command line option: \fI\%\-\-ignore\-url\fP
|
|
.TP
|
|
\fBignorewarnings=\fP\fINAME\fP[\fB,\fP\fINAME\fP\&...]
|
|
Ignore the comma\-separated list of warnings. See \fI\%WARNINGS\fP for
|
|
the list of supported warnings. Messages are logged as information.
|
|
Command line option: none
|
|
.TP
|
|
\fBignorewarningsforurls=\fP\fIURL_REGEX\fP [\fINAME_REGEX\fP] (\fI\%MULTILINE\fP)
|
|
Specify regular expressions to ignore warnings for matching URLs, one
|
|
per line.
|
|
On each line, you can specify a second regular expression,
|
|
ensuring that only the warnings with names matching the second
|
|
expression will be ignored for that URL.
|
|
If the second expression is omitted, all warnings are ignored for
|
|
that URL.
|
|
.sp
|
|
Default is to not ignore any warnings.
|
|
See \fI\%WARNINGS\fP for the list of supported warnings.
|
|
Messages are logged as information.
|
|
Command line option: none
|
|
.sp
|
|
Example:
|
|
.UNINDENT
|
|
.INDENT 0.0
|
|
.INDENT 3.5
|
|
.sp
|
|
.nf
|
|
.ft C
|
|
[filtering]
|
|
ignorewarningsforurls=
|
|
^https://redirected\e.example\e.com ^http\-redirected
|
|
.ft P
|
|
.fi
|
|
.UNINDENT
|
|
.UNINDENT
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBinternlinks=\fP\fIREGEX\fP
|
|
Regular expression to add more URLs recognized as internal links.
|
|
Default is that URLs given on the command line are internal.
|
|
Command line option: none
|
|
.TP
|
|
\fBnofollow=\fP\fIREGEX\fP (\fI\%MULTILINE\fP)
|
|
Check but do not recurse into URLs matching the given regular
|
|
expressions.
|
|
Command line option: \fI\%\-\-no\-follow\-url\fP
|
|
.TP
|
|
\fBcheckextern=\fP[\fB0\fP|\fB1\fP]
|
|
Check external links. Default is to check internal links only.
|
|
Command line option: \fI\%\-\-check\-extern\fP
|
|
.UNINDENT
|
|
.SS authentication
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (\fI\%MULTILINE\fP)
|
|
Provide individual username/password pairs for different links. In
|
|
addition to a single login page specified with \fBloginurl\fP multiple
|
|
FTP and HTTP (Basic Authentication) links are supported.
|
|
Entries are a triple (URL regex, username, password) or a tuple (URL
|
|
regex, username), where the entries are separated by whitespace.
|
|
The password is optional and if missing it has to be entered at the
|
|
commandline.
|
|
If the regular expression matches the checked URL, the given
|
|
username/password pair is used for authentication. The command line
|
|
options \fI\%\-u\fP and \fI\%\-p\fP match every link and therefore override
|
|
the entries given here. The first match wins.
|
|
Command line option: \fI\%\-u\fP, \fI\%\-p\fP
|
|
.TP
|
|
\fBloginurl=\fP\fIURL\fP
|
|
The URL of a login page to be visited before link checking. The page
|
|
is expected to contain an HTML form to collect credentials and
|
|
submit them to the address in its action attribute using an HTTP
|
|
POST request. The name attributes of the input elements of the form
|
|
and the values to be submitted need to be available (see \fBentry\fP
|
|
for an explanation of username and password values).
|
|
.TP
|
|
\fBloginuserfield=\fP\fISTRING\fP
|
|
The name attribute of the username input element. Default: \fBlogin\fP\&.
|
|
.TP
|
|
\fBloginpasswordfield=\fP\fISTRING\fP
|
|
The name attribute of the password input element. Default: \fBpassword\fP\&.
|
|
.TP
|
|
\fBloginextrafields=\fP\fINAME\fP\fB:\fP\fIVALUE\fP (\fI\%MULTILINE\fP)
|
|
Optionally the name attributes of any additional input elements and
|
|
the values to populate them with. Note that these are submitted
|
|
without checking whether matching input elements exist in the HTML
|
|
form.
|
|
.UNINDENT
|
|
.SS output
|
|
.SS URL checking results
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfileoutput=\fP\fITYPE\fP[\fB,\fP\fITYPE\fP\&...]
|
|
Output to a file \fBlinkchecker\-out.\fP\fITYPE\fP, or
|
|
\fB$XDG_DATA_HOME/linkchecker/failures\fP for the \fBfailures\fP output type.
|
|
Valid file output types are \fBtext\fP, \fBhtml\fP, \fBsql\fP, \fBcsv\fP,
|
|
\fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBfailures\fP\&. Default is no
|
|
file output. The various output types are documented below. Note
|
|
that you can suppress all console output with \fBoutput=none\fP\&.
|
|
Command line option: \fI\%\-\-file\-output\fP
|
|
.TP
|
|
\fBlog=\fP\fITYPE\fP[\fB/\fP\fIENCODING\fP]
|
|
Specify the console output type as \fBtext\fP, \fBhtml\fP, \fBsql\fP, \fBcsv\fP,
|
|
\fBgml\fP, \fBdot\fP, \fBxml\fP, \fBnone\fP or \fBfailures\fP\&. Default type
|
|
is \fBtext\fP\&. The various output types are documented below.
|
|
The \fIENCODING\fP specifies the output encoding, the default is that of
|
|
your locale. Valid encodings are listed at
|
|
\X'tty: link https://docs.python.org/library/codecs.html#standard-encodings'\fI\%https://docs.python.org/library/codecs.html#standard\-encodings\fP\X'tty: link'\&.
|
|
Command line option: \fI\%\-\-output\fP
|
|
.TP
|
|
\fBverbose=\fP[\fB0\fP|\fB1\fP]
|
|
If set log all checked URLs once, overriding \fBwarnings\fP\&.
|
|
Default is to log only errors and warnings.
|
|
Command line option: \fI\%\-\-verbose\fP
|
|
.TP
|
|
\fBwarnings=\fP[\fB0\fP|\fB1\fP]
|
|
If set log warnings. Default is to log warnings.
|
|
Command line option: \fI\%\-\-no\-warnings\fP
|
|
.TP
|
|
\fBignoreerrors=\fP\fIURL_REGEX\fP [\fIMESSAGE_REGEX\fP] (\fI\%MULTILINE\fP)
|
|
Specify regular expressions to ignore errors for matching URLs, one
|
|
per line. A second regular expression can be specified per line to
|
|
only ignore matching error messages per corresponding URL. If the
|
|
second expression is omitted, all errors are ignored. In contrast
|
|
to \fI\%filtering\fP, this happens \fIafter\fP checking, which allows checking
|
|
URLs despite certain expected and tolerable errors. Default is to
|
|
not ignore any errors. Example:
|
|
.UNINDENT
|
|
.INDENT 0.0
|
|
.INDENT 3.5
|
|
.sp
|
|
.nf
|
|
.ft C
|
|
[output]
|
|
ignoreerrors=
|
|
^https://deprecated\e.example\e.com ^410 Gone
|
|
# ignore all errors (no second expression), also for syntax check:
|
|
^mailto:.*@example\e.com$
|
|
.ft P
|
|
.fi
|
|
.UNINDENT
|
|
.UNINDENT
|
|
.SS Progress updates
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBstatus=\fP[\fB0\fP|\fB1\fP]
|
|
Control printing URL checker status messages. Default is 1.
|
|
Command line option: \fI\%\-\-no\-status\fP
|
|
.UNINDENT
|
|
.SS Application
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP\&...]
|
|
Print debugging output for the given logger. Available debug
|
|
loggers are \fBcmdline\fP, \fBchecking\fP, \fBcache\fP, \fBplugin\fP and \fBall\fP\&.
|
|
\fBall\fP is an alias for all available loggers.
|
|
Command line option: \fI\%\-\-debug\fP
|
|
.UNINDENT
|
|
.SS Quiet
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBquiet=\fP[\fB0\fP|\fB1\fP]
|
|
If set, operate quiet. An alias for \fBlog=none\fP that also hides
|
|
application information messages.
|
|
This is only useful with \fBfileoutput\fP, else no results will be output.
|
|
Command line option: \fI\%\-\-quiet\fP
|
|
.UNINDENT
|
|
.SH OUTPUT TYPES
|
|
.SS text
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
Specify output filename for text logging. Default filename is
|
|
\fBlinkchecker\-out.txt\fP\&.
|
|
Command line option: \fI\%\-\-file\-output\fP
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
Comma\-separated list of parts that have to be logged. See \fI\%LOGGER PARTS\fP
|
|
below.
|
|
Command line option: none
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
Valid encodings are listed in
|
|
\X'tty: link https://docs.python.org/library/codecs.html#standard-encodings'\fI\%https://docs.python.org/library/codecs.html#standard\-encodings\fP\X'tty: link'\&.
|
|
Default encoding is the system default locale encoding.
|
|
.TP
|
|
\fBwraplength=\fP\fINUMBER\fP
|
|
The number of characters at which to wrap each message line.
|
|
The default is 65.
|
|
Command line option: none
|
|
.TP
|
|
.B \fIcolor*\fP
|
|
Color settings for the various log parts, syntax is \fIcolor\fP or
|
|
\fItype\fP\fB;\fP\fIcolor\fP\&. The \fItype\fP can be \fBbold\fP, \fBlight\fP,
|
|
\fBblink\fP, \fBinvert\fP\&. The \fIcolor\fP can be \fBdefault\fP, \fBblack\fP,
|
|
\fBred\fP, \fBgreen\fP, \fByellow\fP, \fBblue\fP, \fBpurple\fP, \fBcyan\fP,
|
|
\fBwhite\fP, \fBBlack\fP, \fBRed\fP, \fBGreen\fP, \fBYellow\fP, \fBBlue\fP,
|
|
\fBPurple\fP, \fBCyan\fP or \fBWhite\fP\&.
|
|
Command line option: none
|
|
.TP
|
|
\fBcolorparent=\fP\fISTRING\fP
|
|
Set parent color. Default is \fBwhite\fP\&.
|
|
.TP
|
|
\fBcolorurl=\fP\fISTRING\fP
|
|
Set URL color. Default is \fBdefault\fP\&.
|
|
.TP
|
|
\fBcolorname=\fP\fISTRING\fP
|
|
Set name color. Default is \fBdefault\fP\&.
|
|
.TP
|
|
\fBcolorreal=\fP\fISTRING\fP
|
|
Set real URL color. Default is \fBcyan\fP\&.
|
|
.TP
|
|
\fBcolorbase=\fP\fISTRING\fP
|
|
Set base URL color. Default is \fBpurple\fP\&.
|
|
.TP
|
|
\fBcolorvalid=\fP\fISTRING\fP
|
|
Set valid color. Default is \fBbold;green\fP\&.
|
|
.TP
|
|
\fBcolorinvalid=\fP\fISTRING\fP
|
|
Set invalid color. Default is \fBbold;red\fP\&.
|
|
.TP
|
|
\fBcolorinfo=\fP\fISTRING\fP
|
|
Set info color. Default is \fBdefault\fP\&.
|
|
.TP
|
|
\fBcolorwarning=\fP\fISTRING\fP
|
|
Set warning color. Default is \fBbold;yellow\fP\&.
|
|
.TP
|
|
\fBcolordltime=\fP\fISTRING\fP
|
|
Set download time color. Default is \fBdefault\fP\&.
|
|
.TP
|
|
\fBcolorreset=\fP\fISTRING\fP
|
|
Set reset color. Default is \fBdefault\fP\&.
|
|
.UNINDENT
|
|
.SS gml
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.UNINDENT
|
|
.SS dot
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.UNINDENT
|
|
.SS csv
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBseparator=\fP\fICHAR\fP
|
|
Set CSV separator. Default is a semicolon (\fB;\fP).
|
|
.TP
|
|
\fBquotechar=\fP\fICHAR\fP
|
|
Set CSV quote character. Default is a double quote (\fB\(dq\fP).
|
|
.TP
|
|
\fBdialect=\fP\fISTRING\fP
|
|
Controls the output formatting.
|
|
See \X'tty: link https://docs.python.org/3/library/csv.html#csv.Dialect'\fI\%https://docs.python.org/3/library/csv.html#csv.Dialect\fP\X'tty: link'\&.
|
|
Default is \fBexcel\fP\&.
|
|
.UNINDENT
|
|
.SS sql
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBdbname=\fP\fISTRING\fP
|
|
Set database name to store into. Default is \fBlinksdb\fP\&.
|
|
.TP
|
|
\fBseparator=\fP\fICHAR\fP
|
|
Set SQL command separator character. Default is a semicolon (\fB;\fP).
|
|
.UNINDENT
|
|
.SS html
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBcolorbackground=\fP\fICOLOR\fP
|
|
Set HTML background color. Default is \fB#fff7e5\fP\&.
|
|
.TP
|
|
\fBcolorurl=\fP
|
|
Set HTML URL color. Default is \fB#dcd5cf\fP\&.
|
|
.TP
|
|
\fBcolorborder=\fP
|
|
Set HTML border color. Default is \fB#000000\fP\&.
|
|
.TP
|
|
\fBcolorlink=\fP
|
|
Set HTML link color. Default is \fB#191c83\fP\&.
|
|
.TP
|
|
\fBcolorwarning=\fP
|
|
Set HTML warning color. Default is \fB#e0954e\fP\&.
|
|
.TP
|
|
\fBcolorerror=\fP
|
|
Set HTML error color. Default is \fB#db4930\fP\&.
|
|
.TP
|
|
\fBcolorok=\fP
|
|
Set HTML valid color. Default is \fB#3ba557\fP\&.
|
|
.UNINDENT
|
|
.SS failures
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.UNINDENT
|
|
.SS xml
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.UNINDENT
|
|
.SS gxml
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.UNINDENT
|
|
.SS sitemap
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBparts=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBencoding=\fP\fISTRING\fP
|
|
See \fI\%[text]\fP section above.
|
|
.TP
|
|
\fBpriority=\fP\fIFLOAT\fP
|
|
A number between 0.0 and 1.0 determining the priority. The default
|
|
priority for the first URL is 1.0, for all child URLs 0.5.
|
|
.TP
|
|
\fBfrequency=\fP[\fBalways\fP|\fBhourly\fP|\fBdaily\fP|\fBweekly\fP|\fBmonthly\fP|\fByearly\fP|\fBnever\fP]
|
|
How frequently pages are changing. Default is \fBdaily\fP\&.
|
|
.UNINDENT
|
|
.SH LOGGER PARTS
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBall\fP
|
|
for all parts
|
|
.TP
|
|
\fBid\fP
|
|
a unique ID for each logentry
|
|
.TP
|
|
\fBrealurl\fP
|
|
the full url link
|
|
.TP
|
|
\fBresult\fP
|
|
valid or invalid, with messages
|
|
.TP
|
|
\fBextern\fP
|
|
1 or 0, only in some logger types reported
|
|
.TP
|
|
\fBbase\fP
|
|
base href=...
|
|
.TP
|
|
\fBname\fP
|
|
<a href=...>name</a> and <img alt=\(dqname\(dq>
|
|
.TP
|
|
\fBparenturl\fP
|
|
if any
|
|
.TP
|
|
\fBinfo\fP
|
|
some additional info, e.g. FTP welcome messages
|
|
.TP
|
|
\fBwarning\fP
|
|
warnings
|
|
.TP
|
|
\fBdltime\fP
|
|
download time
|
|
.TP
|
|
\fBchecktime\fP
|
|
check time
|
|
.TP
|
|
\fBurl\fP
|
|
the original url name, can be relative
|
|
.TP
|
|
\fBintro\fP
|
|
the blurb at the beginning, \(dqstarting at ...\(dq
|
|
.TP
|
|
\fBoutro\fP
|
|
the blurb at the end, \(dqfound x errors ...\(dq
|
|
.UNINDENT
|
|
.SH MULTILINE
|
|
.sp
|
|
Some option values can span multiple lines. Each line has to be indented
|
|
for that to work. Lines starting with a hash (\fB#\fP) will be ignored,
|
|
though they must still be indented.
|
|
.INDENT 0.0
|
|
.INDENT 3.5
|
|
.sp
|
|
.nf
|
|
.ft C
|
|
ignore=
|
|
lconline
|
|
bookmark
|
|
# a comment
|
|
^mailto:
|
|
.ft P
|
|
.fi
|
|
.UNINDENT
|
|
.UNINDENT
|
|
.SH EXAMPLE
|
|
.INDENT 0.0
|
|
.INDENT 3.5
|
|
.sp
|
|
.nf
|
|
.ft C
|
|
[output]
|
|
log=html
|
|
|
|
[checking]
|
|
threads=5
|
|
|
|
[filtering]
|
|
ignorewarnings=http\-moved\-permanent
|
|
.ft P
|
|
.fi
|
|
.UNINDENT
|
|
.UNINDENT
|
|
.SH PLUGINS
|
|
.sp
|
|
All plugins have a separate section. If the section appears in the
|
|
configuration file the plugin is enabled. Some plugins read extra
|
|
options in their section.
|
|
.SS AnchorCheck
|
|
.sp
|
|
Checks validity of HTML anchors. When checking local files, URLs with anchors
|
|
that link to directories e.g. \(dqexample/#anchor\(dq are not supported. There is no
|
|
such limitation when using http(s).
|
|
.SS LocationInfo
|
|
.sp
|
|
Adds the country and if possible city name of the URL host as info.
|
|
Needs GeoIP or pygeoip and a local country or city lookup DB installed.
|
|
.SS RegexCheck
|
|
.sp
|
|
Define a regular expression which prints a warning if it matches any
|
|
content of the checked link. This applies only to valid pages, so we can
|
|
get their content.
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBwarningregex=\fP\fIREGEX\fP
|
|
Use this to check for pages that contain some form of error message,
|
|
for example \(dqThis page has moved\(dq or \(dqOracle Application error\(dq.
|
|
\fIREGEX\fP should be unquoted.
|
|
.sp
|
|
Note that multiple values can be combined in the regular expression,
|
|
for example \(dq(This page has moved|Oracle Application error)\(dq.
|
|
.UNINDENT
|
|
.SS SslCertificateCheck
|
|
.sp
|
|
Check SSL certificate expiration date. Only internal https: links will
|
|
be checked. A domain will only be checked once to avoid duplicate
|
|
warnings.
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBsslcertwarndays=\fP\fINUMBER\fP
|
|
Configures the expiration warning time in days.
|
|
.UNINDENT
|
|
.SS HtmlSyntaxCheck
|
|
.sp
|
|
Check the syntax of HTML pages by submitting their URLs to the online W3C HTML
|
|
validator. If a page URL is not accessible to the validator no check is
|
|
performed and no warning given.
|
|
See \X'tty: link https://validator.w3.org/docs/api.html'\fI\%https://validator.w3.org/docs/api.html\fP\X'tty: link'\&.
|
|
.sp
|
|
\fBNOTE:\fP
|
|
.INDENT 0.0
|
|
.INDENT 3.5
|
|
The HtmlSyntaxCheck plugin is currently broken and is disabled.
|
|
.UNINDENT
|
|
.UNINDENT
|
|
.SS HttpHeaderInfo
|
|
.sp
|
|
Print HTTP headers in URL info.
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBprefixes=\fP\fIprefix1\fP[,*prefix2*]...
|
|
List of comma separated header prefixes. For example to display all
|
|
HTTP headers that start with \(dqX\-\(dq.
|
|
.UNINDENT
|
|
.SS CssSyntaxCheck
|
|
.sp
|
|
Check the syntax of CSS stylesheets by submitting their URLs to the online W3C CSS
|
|
validator. If a stylesheet URL is not accessible to the validator no check is
|
|
performed and no warning given.
|
|
See \X'tty: link https://jigsaw.w3.org/css-validator/manual.html#expert'\fI\%https://jigsaw.w3.org/css\-validator/manual.html#expert\fP\X'tty: link'\&.
|
|
.SS VirusCheck
|
|
.sp
|
|
Checks the page content for virus infections with clamav. A local clamav
|
|
daemon must be installed.
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBclamavconf=\fP\fIfilename\fP
|
|
Filename of \fBclamd.conf\fP config file.
|
|
.UNINDENT
|
|
.SS PdfParser
|
|
.sp
|
|
Parse PDF files for URLs to check. Needs the \X'tty: link https://pypi.org/project/pdfminer.six/'\fI\%pdfminer.six\fP\X'tty: link' Python package
|
|
installed.
|
|
.SS WordParser
|
|
.sp
|
|
Parse Word files for URLs to check. Needs the \X'tty: link https://pypi.org/project/pywin32/'\fI\%pywin32\fP\X'tty: link' Python
|
|
extension installed.
|
|
.SS MarkdownCheck
|
|
.sp
|
|
Parse Markdown files for URLs to check.
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfilename_re=\fP\fIREGEX\fP
|
|
Regular expression matching the names of Markdown files.
|
|
.UNINDENT
|
|
.SH WARNINGS
|
|
.sp
|
|
The following warnings are recognized by \fBignorewarnings\fP and
|
|
\fBignorewarningsforurls\fP:
|
|
.INDENT 0.0
|
|
.TP
|
|
\fBfile\-anchorcheck\-directory\fP
|
|
A local directory with an anchor, not supported by AnchorCheck.
|
|
.TP
|
|
\fBfile\-missing\-slash\fP
|
|
The file: URL is missing a trailing slash.
|
|
.TP
|
|
\fBfile\-system\-path\fP
|
|
The file: path is not the same as the system specific path.
|
|
.TP
|
|
\fBftp\-missing\-slash\fP
|
|
The ftp: URL is missing a trailing slash.
|
|
.TP
|
|
\fBhttp\-cookie\-store\-error\fP
|
|
An error occurred while storing a cookie.
|
|
.TP
|
|
\fBhttp\-empty\-content\fP
|
|
The URL had no content.
|
|
.TP
|
|
\fBhttp\-rate\-limited\fP
|
|
Too many HTTP requests.
|
|
.TP
|
|
\fBhttp\-redirected\fP
|
|
Redirected to a different URL.
|
|
.TP
|
|
\fBmail\-no\-mx\-host\fP
|
|
The mail MX host could not be found.
|
|
.TP
|
|
\fBurl\-content\-size\-zero\fP
|
|
The URL content size is zero.
|
|
.TP
|
|
\fBurl\-content\-too\-large\fP
|
|
The URL content size is too large.
|
|
.TP
|
|
\fBurl\-content\-type\-unparseable\fP
|
|
The URL content type is not parseable.
|
|
.TP
|
|
\fBurl\-effective\-url\fP
|
|
The effective URL is different from the original.
|
|
.TP
|
|
\fBurl\-error\-getting\-content\fP
|
|
Could not get the content of the URL.
|
|
.TP
|
|
\fBurl\-obfuscated\-ip\fP
|
|
The IP is obfuscated.
|
|
.TP
|
|
\fBurl\-whitespace\fP
|
|
The URL contains leading or trailing whitespace.
|
|
.UNINDENT
|
|
.SH SEE ALSO
|
|
.sp
|
|
\fB\fI\%linkchecker(1)\fP\fP
|
|
.SH AUTHOR
|
|
Bastian Kleineidam <bastian.kleineidam@web.de>
|
|
.SH COPYRIGHT
|
|
2000-2016 Bastian Kleineidam, 2010-2024 LinkChecker Authors
|
|
.\" Generated by docutils manpage writer.
|
|
.
|