From 441cda5e150e759e588e5a562d7a47163832f86f Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 1/6] Switch to mandoc for generating html man pages Removes the need for diff files and is a currently maintained project. Cross references are only supported for mdoc macros but because we only have two pages this can be achieved with sed. A clean target is added to the Makefile to make development easier. --- doc/Makefile | 22 +- doc/linkchecker.1.html.diff | 80 -- doc/linkcheckerrc.5.html.diff | 11 - doc/web/media/man1/linkchecker.1.html | 1063 +++++++++---------- doc/web/media/man5/linkcheckerrc.5.html | 1259 +++++++++++------------ 5 files changed, 1115 insertions(+), 1320 deletions(-) delete mode 100644 doc/linkchecker.1.html.diff delete mode 100644 doc/linkcheckerrc.5.html.diff diff --git a/doc/Makefile b/doc/Makefile index 68682cd4..dab1f950 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -13,13 +13,17 @@ po4a: man: $(MANHTMLFILES) -$(HTMLDIR)/man1/linkchecker.1.html: en/linkchecker.1 linkchecker.1.html.diff - man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@ - patch --no-backup-if-mismatch --quiet $@ linkchecker.1.html.diff +$(HTMLDIR)/man1/linkchecker.1.html: en/linkchecker.1 + mandoc -Thtml $< > $@ + @sed -i -e \ + 's:linkcheckerrc(5):linkcheckerrc(5):g' \ + $(HTMLDIR)/man1/linkchecker.1.html -$(HTMLDIR)/man5/linkcheckerrc.5.html: en/linkcheckerrc.5 linkcheckerrc.5.html.diff - man2html -r $< | tail -n +2 | sed 's/Time:.*//g' | sed 's@/:@/@g' > $@ - patch --no-backup-if-mismatch --quiet $@ linkcheckerrc.5.html.diff +$(HTMLDIR)/man5/linkcheckerrc.5.html: en/linkcheckerrc.5 + mandoc -Thtml $< > $@ + @sed -i -e \ + 's:linkchecker(1):linkchecker(1):g' \ + $(HTMLDIR)/man5/linkcheckerrc.5.html # check all makefiles for formatting warnings check: @@ -32,4 +36,8 @@ check: done; \ done -.PHONY: po4a man check +clean: + rm $(MANHTMLFILES) + +.PHONY: po4a man check clean + diff --git a/doc/linkchecker.1.html.diff b/doc/linkchecker.1.html.diff deleted file mode 100644 index 73215803..00000000 --- a/doc/linkchecker.1.html.diff +++ /dev/null @@ -1,80 +0,0 @@ ---- linkchecker.1.html.orig 2011-06-14 21:14:55.016011206 +0200 -+++ linkchecker.1.html 2011-06-14 21:17:07.108913849 +0200 -@@ -38,7 +38,7 @@ - - The most common use checks the given domain recursively, plus any - URL pointing outside of the domain: --
  linkchecker http://www.example.net/ -+
  linkchecker http://www.example.net/ -
- - Beware that this checks the whole site which can have thousands of URLs. -@@ -59,15 +59,15 @@ -
- - You can skip the http:// url part if the domain starts with www.: --
  linkchecker www.example.com -+
  linkchecker www.example.com -
- - You can skip the ftp:// url part if the domain starts with ftp.: --
  linkchecker -r0 ftp.example.org -+
  linkchecker -r0 ftp.example.org -
- - Generate a sitemap graph and convert it with the graphviz dot utility: --
  linkchecker -odot -v www.example.com | dot -Tps > sitemap.ps -+
  linkchecker -odot -v www.example.com | dot -Tps > sitemap.ps -   -

OPTIONS

- -@@ -302,8 +302,8 @@ - - Multiple entries are separated by a blank line. - The example below will send two cookies to all URLs starting with --http://example.com/hello/ and one to all URLs starting --with https://example.org/: -+http://example.com/hello/ and one to all URLs starting -+with https://example.org/: -

-
 Host: example.com -
 Path: /hello -@@ -326,15 +326,15 @@ - variables to ignore any proxy settings for these domains. - Setting a HTTP proxy on Unix for example looks like this: -

--
  export http_proxy="http://proxy.example.com:8080" -+
  export http_proxy="http://proxy.example.com:8080" -

- Proxy authentication is also supported: -

--
  export http_proxy="http://user1:mypass@proxy.example.org:8081" -+
  export http_proxy="http://user1:mypass@proxy.example.org:8081" -

- Setting a proxy on the Windows command prompt: -

--
  set http_proxy=http://proxy.example.com:8080 -+
  set http_proxy=http://proxy.example.com:8080 -

-   -

PERFORMED CHECKS

-@@ -470,8 +470,8 @@ -

NOTES

- - URLs on the commandline starting with ftp. are treated like --ftp://ftp., URLs starting with www. are treated like --http://www.. -+ftp://ftp., URLs starting with www. are treated like -+http://www.. - You can also give local files as arguments. -

- If you have your system configured to automatically establish a -@@ -584,7 +584,7 @@ - -


- This document was created by --man2html, -+man2html, - using the manual pages.
- - diff --git a/doc/linkcheckerrc.5.html.diff b/doc/linkcheckerrc.5.html.diff deleted file mode 100644 index fb319da1..00000000 --- a/doc/linkcheckerrc.5.html.diff +++ /dev/null @@ -1,11 +0,0 @@ ---- linkcheckerrc.5.html.orig 2011-06-15 06:38:09.830998286 +0200 -+++ linkcheckerrc.5.html 2011-06-15 06:38:18.327310373 +0200 -@@ -487,7 +487,7 @@ - -
- This document was created by --man2html, -+man2html, - using the manual pages.
- - diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index 1d0adf7f..9344feab 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -1,564 +1,511 @@ - - -Man page of LINKCHECKER - -

LINKCHECKER

-Section: LinkChecker commandline usage (1)
Updated: 2010-07-01
Index -Return to Main Contents
- -  -

NAME

- -linkchecker - command line client to check HTML documents and websites for broken links -  -

SYNOPSIS

- -linkchecker [options] [file-or-url]... -  -

DESCRIPTION

- -

- + + + + + + LINKCHECKER(1) + + + + + + + + +
LINKCHECKER(1)LinkChecker commandline usageLINKCHECKER(1)
+

+
+

+linkchecker - command line client to check HTML documents and websites for + broken links +
+
+

+linkchecker [options] [file-or-url]... +
+
+

LinkChecker features -
-
-recursive and multithreaded checking, -
-output in colored or normal text, HTML, SQL, CSV, XML or a sitemap graph in different formats, -
-support for HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Telnet and local file links, -
-restriction of link checking with URL filters, -
-proxy support, -
-username/password authorization for HTTP, FTP and Telnet, -
-support for robots.txt exclusion protocol, -
-support for Cookies -
-support for HTML5 -
-HTML and CSS syntax check -
-Antivirus check -
-a command line and web interface -
-  -

EXAMPLES

- +
    +
  • recursive and multithreaded checking,
  • +
  • output in colored or normal text, HTML, SQL, CSV, XML or a sitemap graph + in different formats,
  • +
  • support for HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Telnet and local + file links,
  • +
  • restriction of link checking with URL filters,
  • +
  • proxy support,
  • +
  • username/password authorization for HTTP, FTP and Telnet,
  • +
  • support for robots.txt exclusion protocol,
  • +
  • support for Cookies
  • +
  • support for HTML5
  • +
  • HTML and CSS syntax check
  • +
  • Antivirus check
  • +
  • a command line and web interface
  • +
+
+
+

The most common use checks the given domain recursively: -
  linkchecker http://www.example.com/ -
- -Beware that this checks the whole site which can have thousands of URLs. -Use the -r option to restrict the recursion depth. -
- -Don't check URLs with /secret in its name. All other links are checked as usual: -
  linkchecker --ignore-url=/secret mysite.example.com -
- + linkchecker http://www.example.com/ +
+Beware that this checks the whole site which can have thousands of URLs. Use the + -r option to restrict the recursion depth. +
+Don't check URLs with /secret in its name. All other links are checked as + usual: + linkchecker --ignore-url=/secret mysite.example.com +
Checking a local HTML file on Unix: -
  linkchecker ../bla.html -
- + linkchecker ../bla.html +
Checking a local HTML file on Windows: -
  linkchecker c:\temp\test.html -
- -You can skip the http:// url part if the domain starts with www.: -
  linkchecker www.example.com -
- -You can skip the ftp:// url part if the domain starts with ftp.: -
  linkchecker -r0 ftp.example.com -
- + linkchecker c:\temp\test.html +
+You can skip the http:// url part if the domain starts with www.: + linkchecker www.example.com +
+You can skip the ftp:// url part if the domain starts with ftp.: + linkchecker -r0 ftp.example.com +
Generate a sitemap graph and convert it with the graphviz dot utility: -
  linkchecker -odot -v www.example.com | dot -Tps > sitemap.ps -  -

OPTIONS

- -  -

General options

- -
-
-fFILENAME, --config=FILENAME
-Use FILENAME as configuration file. As default LinkChecker -uses ~/.linkchecker/linkcheckerrc. -
-h, --help
-Help me! Print usage information for this program. -
--stdin
-Read list of white-space separated URLs to check from stdin. -
-tNUMBER, --threads=NUMBER
-Generate no more than the given number of threads. Default number -of threads is 10. To disable threading specify a non-positive number. -
-V, --version
-Print version and exit. -
--list-plugins
-Print available check plugins and exit. -
-  -

Output options

- -
-
-DSTRING, --debug=STRING
-Print debugging output for the given logger. -Available loggers are cmdline, checking, -cache, dns and all. -Specifying all is an alias for specifying all available loggers. -The option can be given multiple times to debug with more -than one logger. - -For accurate results, threading will be disabled during debug runs. -
-FTYPE[/ENCODING][/FILENAME], --file-output=TYPE[/ENCODING][/FILENAME]
-Output to a file linkchecker-out.TYPE, -$HOME/.linkchecker/blacklist for -blacklist output, or FILENAME if specified. -The ENCODING specifies the output encoding, the default is -that of your locale. -Valid encodings are listed at -http://docs.python.org/library/codecs.html#standard-encodings. -
- -The FILENAME and ENCODING parts of the none output type -will be ignored, else if the file already exists, it will be overwritten. -You can specify this option more than once. Valid file output types -are text, html, sql, -csv, gml, dot, xml, sitemap, none or -blacklist. -Default is no file output. The various output types are documented -below. Note that you can suppress all console output -with the option -o none. -
--no-status
-Do not print check status messages. -
--no-warnings
-Don't log warnings. Default is to log warnings. -
-oTYPE[/ENCODING], --output=TYPE[/ENCODING]
-Specify output type as text, html, sql, -csv, gml, dot, xml, sitemap, none or -blacklist. -Default type is text. The various output types are documented -below. -
- -The ENCODING specifies the output encoding, the default is -that of your locale. Valid encodings are listed at -http://docs.python.org/library/codecs.html#standard-encodings. -
-q, --quiet
-Quiet operation, an alias for -o none. -This is only useful with -F. -
-v, --verbose
-Log all checked URLs. Default is to log only errors and warnings. -
-WREGEX, --warning-regex=REGEX
-Define a regular expression which prints a warning if it matches any -content of the checked link. -This applies only to valid pages, so we can get their content. -
- -Use this to check for pages that contain some form of error, for example -"This page has moved" or "Oracle Application error". -
- -Note that multiple values can be combined in the regular expression, -for example "(This page has moved|Oracle Application error)". -
- -See section REGULAR EXPRESSIONS for more info. -
-  -

Checking options

- -
-
--cookiefile=FILENAME
-Read a file with initial cookie data. The cookie data -format is explained below. -
--check-extern
-Check external URLs. -
--ignore-url=REGEX
-URLs matching the given regular expression will be ignored and not checked. -
- -This option can be given multiple times. -
- -See section REGULAR EXPRESSIONS for more info. -
-NSTRING, --nntp-server=STRING
-Specify an NNTP server for news: links. Default is the -environment variable NNTP_SERVER. If no host is given, -only the syntax of the link is checked. -
--no-follow-url=REGEX
-Check but do not recurse into URLs matching the given regular -expression. -
- -This option can be given multiple times. -
- -See section REGULAR EXPRESSIONS for more info. -
-p, --password
-Read a password from console and use it for HTTP and FTP authorization. -For FTP the default password is anonymous@. For HTTP there is -no default password. See also -u. -
-rNUMBER, --recursion-level=NUMBER
-Check recursively all links up to given depth. -A negative depth will enable infinite recursion. -Default depth is infinite. -
--timeout=NUMBER
-Set the timeout for connection attempts in seconds. The default timeout -is 60 seconds. -
-uSTRING, --user=STRING
-Try the given username for HTTP and FTP authorization. -For FTP the default username is anonymous. For HTTP there is -no default username. See also -p. -
--user-agent=STRING
-Specify the User-Agent string to send to the HTTP server, for example -"Mozilla/4.0". The default is "LinkChecker/X.Y" where X.Y is the current -version of LinkChecker. -

-

-  -

CONFIGURATION FILES

- -Configuration files can specify all options above. They can also -specify some options that cannot be set on the command line. -See linkcheckerrc(5) for more info. -

-  -

OUTPUT TYPES

- -Note that by default only errors and warnings are logged. -You should use the --verbose option to get the complete URL list, -especially when outputting a sitemap graph format. -

-

-
text
-Standard text logger, logging URLs in keyword: argument fashion. -
html
-Log URLs in keyword: argument fashion, formatted as HTML. -Additionally has links to the referenced pages. Invalid URLs have -HTML and CSS syntax check links appended. -
csv
-Log check result in CSV format with one URL per line. -
gml
-Log parent-child relations between linked URLs as a GML sitemap graph. -
dot
-Log parent-child relations between linked URLs as a DOT sitemap graph. -
gxml
-Log check result as a GraphXML sitemap graph. -
xml
-Log check result as machine-readable XML. -
sitemap
-Log check result as an XML sitemap whose protocol is documented at -http://www.sitemaps.org/protocol.html. -
sql
-Log check result as SQL script with INSERT commands. An example -script to create the initial SQL table is included as create.sql. -
blacklist
-Suitable for cron jobs. Logs the check result into a file -~/.linkchecker/blacklist which only contains entries with invalid -URLs and the number of times they have failed. -
none
-Logs nothing. Suitable for debugging or checking the exit code. -
-  -

REGULAR EXPRESSIONS

- -LinkChecker accepts Python regular expressions. -See http://docs.python.org/howto/regex.html for an introduction. -

-An addition is that a leading exclamation mark negates the regular -expression. -  -

COOKIE FILES

- -A cookie file contains standard HTTP header (RFC 2616) data with the -following possible names: -
-
Host (required)
-Sets the domain the cookies are valid for. -
Path (optional)
-Gives the path the cookies are value for; default path is /. -
Set-cookie (required)
-Set cookie name/value. Can be given more than once. -
-

- -Multiple entries are separated by a blank line. -The example below will send two cookies to all URLs starting with -http://example.com/hello/ and one to all URLs starting -with https://example.org/: -

-
 Host: example.com -
 Path: /hello -
 Set-cookie: ID="smee" -
 Set-cookie: spam="egg" -

-
 Host: example.org -
 Set-cookie: baggage="elitist"; comment="hologram" -

-  -

PROXY SUPPORT

- -To use a proxy on Unix or Windows set the $http_proxy, $https_proxy or $ftp_proxy -environment variables to the proxy URL. The URL should be of the form -http://[user:pass@]host[:port]. -LinkChecker also detects manual proxy settings of Internet Explorer under -Windows systems, and gconf or KDE on Linux systems. -On a Mac use the Internet Config to select a proxy. -You can also set a comma-separated domain list in the $no_proxy environment -variables to ignore any proxy settings for these domains. -Setting a HTTP proxy on Unix for example looks like this: -

-
  export http_proxy="http://proxy.example.com:8080" -

-Proxy authentication is also supported: -

-
  export http_proxy="http://user1:mypass@proxy.example.org:8081" -

-Setting a proxy on the Windows command prompt: -

-
  set http_proxy=http://proxy.example.com:8080 -

-  -

PERFORMED CHECKS

- -All URLs have to pass a preliminary syntax test. Minor quoting -mistakes will issue a warning, all other invalid syntax issues -are errors. -After the syntax check passes, the URL is queued for connection -checking. All connection check types are described below. -
-
HTTP links (http:, https:)
-After connecting to the given HTTP server the given path -or query is requested. All redirections are followed, and -if user/password is given it will be used as authorization -when necessary. -All final HTTP status codes other than 2xx are errors. -HTML page contents are checked for recursion. -
Local files (file:)
-A regular, readable file that can be opened is valid. A readable -directory is also valid. All other files, for example device files, -unreadable or non-existing files are errors. -HTML or other parseable file contents are checked for recursion. -
Mail links (mailto:)
-A mailto: link eventually resolves to a list of email addresses. -If one address fails, the whole list will fail. -For each mail address we check the following things: -
  1) Check the adress syntax, both of the part before and after -
     the @ sign. -
  2) Look up the MX DNS records. If we found no MX record, -
     print an error. -
  3) Check if one of the mail hosts accept an SMTP connection. -
     Check hosts with higher priority first. -
     If no host accepts SMTP, we print a warning. -
  4) Try to verify the address with the VRFY command. If we got -
     an answer, print the verified address as an info. -
FTP links (ftp:)
-
   -
  For FTP links we do: -
   -
  1) connect to the specified host -
  2) try to login with the given user and password. The default -
     user is ``anonymous``, the default password is ``anonymous@``. -
  3) try to change to the given directory -
  4) list the file with the NLST command -

-

Telnet links (``telnet:``)
-
   -
  We try to connect and if user/password are given, login to the -
  given telnet server. -

-

NNTP links (``news:``, ``snews:``, ``nntp``)
-
   -
  We try to connect to the given NNTP server. If a news group or -
  article is specified, try to request it from the server. -

-

Unsupported links (``javascript:``, etc.)
-
   -
  An unsupported link will only print a warning. No further checking -
  will be made. -
   -
  The complete list of recognized, but unsupported links can be found -
  in the linkcheck/checker/unknownurl.py source file. -
  The most prominent of them should be JavaScript links. -

-

-  -

PLUGINS

- -There are two plugin types: connection and content plugins. -Connection plugins are run after a successful connection to the -URL host. -Content plugins are run if the URL type has content -(mailto: URLs have no content for example) and if the check is not -forbidden (ie. by HTTP robots.txt). -See linkchecker --list-plugins for a list of plugins and -their documentation. All plugins are enabled via the linkcheckerrc(5) -configuration file. -

-  -

RECURSION

- -Before descending recursively into a URL, it has to fulfill several -conditions. They are checked in this order: -

-1. A URL must be valid. -

-2. A URL must be parseable. This currently includes HTML files, -
   Opera bookmarks files, and directories. If a file type cannot -
   be determined (for example it does not have a common HTML file -
   extension, and the content does not look like HTML), it is assumed -
   to be non-parseable. -

-3. The URL content must be retrievable. This is usually the case -
   except for example mailto: or unknown URL types. -

-4. The maximum recursion level must not be exceeded. It is configured -
   with the --recursion-level option and is unlimited per default. -

-5. It must not match the ignored URL list. This is controlled with -
   the --ignore-url option. -

-6. The Robots Exclusion Protocol must allow links in the URL to be -
   followed recursively. This is checked by searching for a -
   "nofollow" directive in the HTML header data. -

-Note that the directory recursion reads all files in that -directory, not just a subset like index.htm*. -

-  -

NOTES

- -URLs on the commandline starting with ftp. are treated like -ftp://ftp., URLs starting with www. are treated like -http://www.. -You can also give local files as arguments. -

-If you have your system configured to automatically establish a -connection to the internet (e.g. with diald), it will connect when -checking links not pointing to your local host. -Use the --ignore-url option to prevent this. -

-Javascript links are not supported. -

-If your platform does not support threading, LinkChecker disables it -automatically. -

-You can supply multiple user/password pairs in a configuration file. -

-When checking news: links the given NNTP host doesn't need to be the -same as the host of the user browsing your pages. -  -

ENVIRONMENT

- -NNTP_SERVER - specifies default NNTP server -
- -http_proxy - specifies default HTTP proxy server -
- -ftp_proxy - specifies default FTP proxy server -
- -no_proxy - comma-separated list of domains to not contact over a proxy server -
- -LC_MESSAGES, LANG, LANGUAGE - specify output language -  -

RETURN VALUE

- + linkchecker -odot -v www.example.com | dot -Tps > sitemap.ps +
+
+

+
+

+
+
-fFILENAME, --config=FILENAME
+
Use FILENAME as configuration file. As default LinkChecker uses + ~/.linkchecker/linkcheckerrc.
+
-h, --help
+
Help me! Print usage information for this program.
+
--stdin
+
Read list of white-space separated URLs to check from stdin.
+
-tNUMBER, --threads=NUMBER
+
Generate no more than the given number of threads. Default number of + threads is 10. To disable threading specify a non-positive number.
+
-V, --version
+
Print version and exit.
+
--list-plugins
+
Print available check plugins and exit.
+
+
+
+

+
+
-DSTRING, --debug=STRING
+
Print debugging output for the given logger. Available loggers are + cmdline, checking, cache, dns, plugins + and all. Specifying all is an alias for specifying all + available loggers. The option can be given multiple times to debug with + more than one logger. For accurate results, threading will be disabled + during debug runs.
+
-FTYPE[/ENCODING][/FILENAME], + --file-output=TYPE[/ENCODING][/FILENAME]
+
Output to a file linkchecker-out.TYPE, + $HOME/.linkchecker/blacklist for blacklist output, or + FILENAME if specified. The ENCODING specifies the output + encoding, the default is that of your locale. Valid encodings are listed + at http://docs.python.org/library/codecs.html#standard-encodings. +
+ The FILENAME and ENCODING parts of the none output type + will be ignored, else if the file already exists, it will be overwritten. + You can specify this option more than once. Valid file output types are + text, html, sql, csv, gml, dot, + xml, sitemap, none or blacklist. Default is no + file output. The various output types are documented below. Note that you + can suppress all console output with the option -o none.
+
--no-status
+
Do not print check status messages.
+
--no-warnings
+
Don't log warnings. Default is to log warnings.
+
-oTYPE[/ENCODING], + --output=TYPE[/ENCODING]
+
Specify output type as text, html, sql, csv, + gml, dot, xml, sitemap, none or + blacklist. Default type is text. The various output types + are documented below. +
+ The ENCODING specifies the output encoding, the default is that of + your locale. Valid encodings are listed at + http://docs.python.org/library/codecs.html#standard-encodings.
+
-q, --quiet
+
Quiet operation, an alias for -o none. This is only useful with + -F.
+
-v, --verbose
+
Log all checked URLs. Default is to log only errors and warnings.
+
-WREGEX, --warning-regex=REGEX
+
Define a regular expression which prints a warning if it matches any + content of the checked link. This applies only to valid pages, so we can + get their content. +
+ Use this to check for pages that contain some form of error, for example + "This page has moved" or "Oracle Application error". +
+ Note that multiple values can be combined in the regular expression, for + example "(This page has moved|Oracle Application error)". +
+ See section REGULAR EXPRESSIONS for more info.
+
+
+
+

+
+
--cookiefile=FILENAME
+
Read a file with initial cookie data. The cookie data format is explained + below.
+
--check-extern
+
Check external URLs.
+
--ignore-url=REGEX
+
URLs matching the given regular expression will be ignored and not + checked. +
+ This option can be given multiple times. +
+ See section REGULAR EXPRESSIONS for more info.
+
-NSTRING, --nntp-server=STRING
+
Specify an NNTP server for news: links. Default is the environment + variable NNTP_SERVER. If no host is given, only the syntax of the + link is checked.
+
--no-follow-url=REGEX
+
Check but do not recurse into URLs matching the given regular expression. +
+ This option can be given multiple times. +
+ See section REGULAR EXPRESSIONS for more info.
+
-p, --password
+
Read a password from console and use it for HTTP and FTP authorization. + For FTP the default password is anonymous@. For HTTP there is no + default password. See also -u.
+
-rNUMBER, --recursion-level=NUMBER
+
Check recursively all links up to given depth. A negative depth will + enable infinite recursion. Default depth is infinite.
+
--timeout=NUMBER
+
Set the timeout for connection attempts in seconds. The default timeout is + 60 seconds.
+
-uSTRING, --user=STRING
+
Try the given username for HTTP and FTP authorization. For FTP the default + username is anonymous. For HTTP there is no default username. See + also -p.
+
--user-agent=STRING
+
Specify the User-Agent string to send to the HTTP server, for example + "Mozilla/4.0". The default is "LinkChecker/X.Y" where + X.Y is the current version of LinkChecker. +

+
+
+
+
+
+

+Configuration files can specify all options above. They can also specify some + options that cannot be set on the command line. See linkcheckerrc(5) + for more info. +

+
+
+

+Note that by default only errors and warnings are logged. You should use the + --verbose option to get the complete URL list, especially when + outputting a sitemap graph format. +

+
+
text
+
Standard text logger, logging URLs in keyword: argument fashion.
+
html
+
Log URLs in keyword: argument fashion, formatted as HTML. Additionally has + links to the referenced pages. Invalid URLs have HTML and CSS syntax check + links appended.
+
csv
+
Log check result in CSV format with one URL per line.
+
gml
+
Log parent-child relations between linked URLs as a GML sitemap + graph.
+
dot
+
Log parent-child relations between linked URLs as a DOT sitemap + graph.
+
gxml
+
Log check result as a GraphXML sitemap graph.
+
xml
+
Log check result as machine-readable XML.
+
sitemap
+
Log check result as an XML sitemap whose protocol is documented at + http://www.sitemaps.org/protocol.html.
+
sql
+
Log check result as SQL script with INSERT commands. An example script to + create the initial SQL table is included as create.sql.
+
blacklist
+
Suitable for cron jobs. Logs the check result into a file + ~/.linkchecker/blacklist which only contains entries with invalid + URLs and the number of times they have failed.
+
none
+
Logs nothing. Suitable for debugging or checking the exit code.
+
+
+
+

+LinkChecker accepts Python regular expressions. See + http://docs.python.org/howto/regex.html for an introduction. +

An addition is that a leading exclamation mark negates the regular + expression.

+
+
+

+A cookie file contains standard HTTP header (RFC 2616) data with the following + possible names: +
+
Host (required)
+
Sets the domain the cookies are valid for.
+
Path (optional)
+
Gives the path the cookies are value for; default path is /.
+
Set-cookie (required)
+
Set cookie name/value. Can be given more than once.
+
+

Multiple entries are separated by a blank line. The example below + will send two cookies to all URLs starting with + http://example.com/hello/ and one to all URLs starting with + https://example.org/:

+

+ Host: example.com + Path: /hello + Set-cookie: ID="smee" + Set-cookie: spam="egg"

+

+ Host: example.org + Set-cookie: baggage="elitist"; comment="hologram"

+

+
+
+

+To use a proxy on Unix or Windows set the $http_proxy, $https_proxy or + $ftp_proxy environment variables to the proxy URL. The URL should be of the + form + http://[user:pass@]host[:port]. + LinkChecker also detects manual proxy settings of Internet Explorer under + Windows systems, and gconf or KDE on Linux systems. On a Mac use the Internet + Config to select a proxy. You can also set a comma-separated domain list in + the $no_proxy environment variables to ignore any proxy settings for these + domains. Setting a HTTP proxy on Unix for example looks like this: +

+ export http_proxy="http://proxy.example.com:8080"

+

Proxy authentication is also supported:

+

+ export http_proxy="http://user1:mypass@proxy.example.org:8081"

+

Setting a proxy on the Windows command prompt:

+

+ set http_proxy=http://proxy.example.com:8080

+

+
+
+

+All URLs have to pass a preliminary syntax test. Minor quoting mistakes will + issue a warning, all other invalid syntax issues are errors. After the syntax + check passes, the URL is queued for connection checking. All connection check + types are described below. +
+
HTTP links (http:, https:)
+
After connecting to the given HTTP server the given path or query is + requested. All redirections are followed, and if user/password is given it + will be used as authorization when necessary. All final HTTP status codes + other than 2xx are errors. HTML page contents are checked for + recursion.
+
Local files (file:)
+
A regular, readable file that can be opened is valid. A readable directory + is also valid. All other files, for example device files, unreadable or + non-existing files are errors. HTML or other parseable file contents are + checked for recursion.
+
Mail links (mailto:)
+
A mailto: link eventually resolves to a list of email addresses. If one + address fails, the whole list will fail. For each mail address we check + the following things: + 1) Check the adress syntax, both of the part before and after + the @ sign. + 2) Look up the MX DNS records. If we found no MX record, + print an error. + 3) Check if one of the mail hosts accept an SMTP connection. + Check hosts with higher priority first. + If no host accepts SMTP, we print a warning. + 4) Try to verify the address with the VRFY command. If we got + an answer, print the verified address as an info.
+
FTP links (ftp:)
+
+

+ For FTP links we do:

+

+ 1) connect to the specified host + 2) try to login with the given user and password. The default + user is ``anonymous``, the default password is ``anonymous@``. + 3) try to change to the given directory + 4) list the file with the NLST command

+

+
+
Telnet links (``telnet:``)
+
+

+ We try to connect and if user/password are given, login to the + given telnet server.

+

+
+
NNTP links (``news:``, ``snews:``, ``nntp``)
+
+

+ We try to connect to the given NNTP server. If a news group or + article is specified, try to request it from the server.

+

+
+
Unsupported links (``javascript:``, etc.)
+
+

+ An unsupported link will only print a warning. No further checking + will be made.

+

+ The complete list of recognized, but unsupported links can be found + in the linkcheck/checker/unknownurl.py source file. + The most prominent of them should be JavaScript links.

+

+
+
+
+
+

+There are two plugin types: connection and content plugins. Connection plugins + are run after a successful connection to the URL host. Content plugins are run + if the URL type has content (mailto: URLs have no content for example) and if + the check is not forbidden (ie. by HTTP robots.txt). See linkchecker + --list-plugins for a list of plugins and their documentation. All plugins + are enabled via the linkcheckerrc(5) configuration file. +

+
+
+

+Before descending recursively into a URL, it has to fulfill several conditions. + They are checked in this order: +

1. A URL must be valid.

+

2. A URL must be parseable. This currently includes HTML files, + Opera bookmarks files, and directories. If a file type cannot + be determined (for example it does not have a common HTML file + extension, and the content does not look like HTML), it is assumed + to be non-parseable.

+

3. The URL content must be retrievable. This is usually the case + except for example mailto: or unknown URL types.

+

4. The maximum recursion level must not be exceeded. It is + configured + with the --recursion-level option and is unlimited per default.

+

5. It must not match the ignored URL list. This is controlled with + the --ignore-url option.

+

6. The Robots Exclusion Protocol must allow links in the URL to be + followed recursively. This is checked by searching for a + "nofollow" directive in the HTML header data.

+

Note that the directory recursion reads all files in that + directory, not just a subset like index.htm*.

+

+
+
+

+URLs on the commandline starting with ftp. are treated like + ftp://ftp., URLs starting with www. are treated like + http://www.. You can also give local files as arguments. +

If you have your system configured to automatically establish a + connection to the internet (e.g. with diald), it will connect when checking + links not pointing to your local host. Use the --ignore-url option to + prevent this.

+

Javascript links are not supported.

+

If your platform does not support threading, LinkChecker disables + it automatically.

+

You can supply multiple user/password pairs in a configuration + file.

+

When checking news: links the given NNTP host doesn't need + to be the same as the host of the user browsing your pages.

+
+
+

+NNTP_SERVER - specifies default NNTP server +
+http_proxy - specifies default HTTP proxy server +
+ftp_proxy - specifies default FTP proxy server +
+no_proxy - comma-separated list of domains to not contact over a proxy + server +
+LC_MESSAGES, LANG, LANGUAGE - specify output language +
+
+

The return value is 2 when -
-
-a program error occurred. -
-

- -The return value is 1 when -

-
-invalid links were found or -
-link warnings were found and warnings are enabled -
-

- -Else the return value is zero. -  -

LIMITATIONS

- -LinkChecker consumes memory for each queued URL to check. With thousands -of queued URLs the amount of consumed memory can become quite large. This -might slow down the program or even the whole system. -  -

FILES

- -~/.linkchecker/linkcheckerrc - default configuration file -
- -~/.linkchecker/blacklist - default blacklist logger output filename -
- -linkchecker-out.TYPE - default logger file output name -
- -http://docs.python.org/library/codecs.html#standard-encodings - valid output encodings -
- -http://docs.python.org/howto/regex.html - regular expression documentation -

-  -

SEE ALSO

- -linkcheckerrc(5) -  -

AUTHOR

- -Bastian Kleineidam <bastian.kleineidam@web.de> -  -

COPYRIGHT

- -Copyright © 2000-2014 Bastian Kleineidam -

- -


- 

Index

-
-
NAME
-
SYNOPSIS
-
DESCRIPTION
-
EXAMPLES
-
OPTIONS
-
-
General options
-
Output options
-
Checking options
-
-
CONFIGURATION FILES
-
OUTPUT TYPES
-
REGULAR EXPRESSIONS
-
COOKIE FILES
-
PROXY SUPPORT
-
PERFORMED CHECKS
-
PLUGINS
-
RECURSION
-
NOTES
-
ENVIRONMENT
-
RETURN VALUE
-
LIMITATIONS
-
FILES
-
SEE ALSO
-
AUTHOR
-
COPYRIGHT
-
-
-This document was created by -man2html, -using the manual pages.
- - - +
+
+
a program error occurred.
+
+

The return value is 1 when

+
    +
  • invalid links were found or
  • +
  • link warnings were found and warnings are enabled
  • +
+

Else the return value is zero.

+
+
+

+LinkChecker consumes memory for each queued URL to check. With thousands of + queued URLs the amount of consumed memory can become quite large. This might + slow down the program or even the whole system. +
+
+

+~/.linkchecker/linkcheckerrc - default configuration file +
+~/.linkchecker/blacklist - default blacklist logger output filename +
+linkchecker-out.TYPE - default logger file output name +
+http://docs.python.org/library/codecs.html#standard-encodings - valid + output encodings +
+http://docs.python.org/howto/regex.html - regular expression + documentation +

+
+
+

+linkcheckerrc(5) +
+
+

+Bastian Kleineidam <bastian.kleineidam@web.de> +
+
+

+Copyright © 2000-2014 Bastian Kleineidam +
+
+ + + + + +
2010-07-01LinkChecker
+ + diff --git a/doc/web/media/man5/linkcheckerrc.5.html b/doc/web/media/man5/linkcheckerrc.5.html index db9f3b2c..1841f978 100644 --- a/doc/web/media/man5/linkcheckerrc.5.html +++ b/doc/web/media/man5/linkcheckerrc.5.html @@ -1,665 +1,596 @@ - - -Man page of linkcheckerrc - -

linkcheckerrc

-Section: File Formats (5)
Updated: 2007-11-30
Index -Return to Main Contents
- -  -

NAME

- + + + + + + linkcheckerrc(5) + + + + + + + + +
linkcheckerrc(5)File Formats Manuallinkcheckerrc(5)
+
+
+

linkcheckerrc - configuration file for LinkChecker -  -

DESCRIPTION

- -linkcheckerrc is the configuration file for LinkChecker. -The file is written in an INI-style format. -
- -The default file location is ~/.linkchecker/linkcheckerrc on Unix, -%HOMEPATH%\.linkchecker\linkcheckerrc on Windows systems. -  -

SETTINGS

- -

-  -

[checking]

- -
-
anchors=[0|1]
-Check HTTP anchor references. Default is not to check anchors. -This option enables logging of the warning url-anchor-not-found. -
- -Command line option: --anchors -
checkcss=[0|1]
-Check syntax of CSS URLs with the W3C online validator. -
- -Command line option: --check-css -
checkhtml=[0|1]
-Check syntax of HTML URLs with the W3C online validator. -
- -Command line option: --check-html -
clamavconf=filename
-Filename of clamd.conf config file. -
- -Command line option: none -
cookiefile=filename
-Read a file with initial cookie data. The cookie data -format is explained in linkchecker(1). -
- -Command line option: --cookiefile -
cookies=[0|1]
-Accept and send HTTP cookies. -
- -Command line option: --cookies -
debugmemory=[0|1]
-When checking finishes, write a memory dump to a temporary file. -The memory dump is written both when checking finishes normally -and when checking gets canceled. -
- -The memory dump only works if the python-meliae package is installed. -Otherwise a warning is printed to install it. -
- -Command line option: none -
localwebroot=STRING
-When checking absolute URLs inside local files, the given root directory -is used as base URL. -
- -Note that the given directory must have URL syntax, so it must use a slash -to join directories instead of a backslash. -And the given directory must end with a slash. -
- -Command line option: none -
nntpserver=STRING
-Specify an NNTP server for news: links. Default is the -environment variable NNTP_SERVER. If no host is given, -only the syntax of the link is checked. -
- -Command line option: --nntp-server -
pause=NUMBER
-Pause the given number of seconds between two subsequent connection -requests to the same host. -
- -Command line option: --pause -
recursionlevel=NUMBER
-Check recursively all links up to given depth. -A negative depth will enable infinite recursion. -Default depth is infinite. -
- -Command line option: --recursion-level -
scanvirus=[0|1]
-Scan content of URLs for viruses with ClamAV. -
- -Command line option: --scan-virus -
threads=NUMBER
-Generate no more than the given number of threads. Default number -of threads is 10. To disable threading specify a non-positive number. -
- -Command line option: --threads -
timeout=NUMBER
-Set the timeout for connection attempts in seconds. The default timeout -is 60 seconds. -
- -Command line option: --timeout -
useragent=STRING
-Specify the User-Agent string to send to the HTTP server, for example -"Mozilla/4.0". The default is "LinkChecker/X.Y" where X.Y is the current -version of LinkChecker. -
- -Command line option: --user-agent -
warningregex==REGEX
-Define a regular expression which prints a warning if it matches any -content of the checked link. -This applies only to valid pages, so we can get their content. -
- -Use this to check for pages that contain some form of error, for example -"This page has moved" or "Oracle Application Server error". -
- -Command line option: --warning-regex -
warnsizebytes=NUMBER
-Print a warning if content size info is available and exceeds the given -number of bytes. -
- -Command line option: --warning-size-bytes -
warnsslcertdaysvalid=NUMBER
-Check that SSL certificates are at least the given number of days valid. -The number must not be negative. -If the number of days is zero a warning is printed only for certificates -that are already expired. -
- -The default number of days is 14. -
- -Command line option: none -
maxrunseconds=NUMBER
-Stop checking new URLs after the given number of seconds. Same as if the -user stops (by hitting Ctrl-C) -after the given number of seconds. -
- -The default is not to stop until all URLs are checked. -
- -Command line option: none -
maxnumurls=NUMBER
-Maximum number of URLs to check. New URLs will not be queued after the -given number of URLs is checked. -
- -The default is to queue and check all URLs. -
- -Command line option: none -
maxconnectionshttp=NUMBER
-Maximum number of connections to HTTP servers. -
- -The default is 10. -
- -Command line option: none -
maxconnectionshttps=NUMBER
-Maximum number of connections to HTTPS servers. -
- -The default is 10. -
- -Command line option: none -
maxconnectionsftp=NUMBER
-Maximum number of connections to FTP servers. -
- -The default is 2. -
- -Command line option: none -
-  -

[filtering]

- -
-
ignore=REGEX (MULTILINE)
-Only check syntax of URLs matching the given regular expressions. -
- -Command line option: --ignore-url -
ignorewarnings=NAME[,NAME...]
-Ignore the comma-separated list of warnings. See -WARNIGS for the list of supported warnings. -
- -Command line option: none -
internlinks=REGEX
-Regular expression to add more URLs recognized as internal links. -Default is that URLs given on the command line are internal. -
- -Command line option: none -
nofollow=REGEX (MULTILINE)
-Check but do not recurse into URLs matching the given regular -expressions. -
- -Command line option: --no-follow-url -
-  -

[authentication]

- -
-
entry=REGEX USER [PASS] (MULTILINE)
-Provide different user/password pairs for different link types. -Entries are a triple (URL regex, username, password) -or a tuple (URL regex, username), where the entries are -separated by whitespace. -
- -The password is optional and if missing it has to be entered at the -commandline. -
- -If the regular expression matches the checked URL, the given user/password -pair is used for authentication. The commandline options --u and -p match every link and therefore override the entries -given here. The first match wins. At the moment, authentication is -used/needed for http[s] and ftp links. -
- -Command line option: -u, -p -
loginurl=URL
-A login URL to be visited before checking. Also needs authentication -data set for it, and implies using cookies because most logins use -cookies nowadays. -
loginuserfield=STRING
-The name of the user CGI field. Default name is login. -
loginpasswordfield=STRING
-The name of the password CGI field. Default name is password. -
loginextrafields=NAME:VALUE (MULTILINE)
-Optionally any additional CGI name/value pairs. Note that the default -values are submitted automatically. -
-  -

[output]

- -
-
complete=[0|1]
-If set log all checked URLs, even duplicates. Default is to log -duplicate URLs only once. -
- -Command line option: --complete -
debug=STRING[,STRING...]
-Print debugging output for the given loggers. -Available loggers are cmdline, checking, -cache, dns, thread and all. -Specifying all is an alias for specifying all available loggers. -
- -Command line option: --debug -
fileoutput=TYPE[,TYPE...]
-Output to a files linkchecker-out.TYPE, -$HOME/.linkchecker/blacklist for -blacklist output. -
- -Valid file output types are text, html, sql, -csv, gml, dot, xml, none or blacklist -Default is no file output. The various output types are documented -below. Note that you can suppress all console output -with output=none. -
- -Command line option: --file-output -
log=TYPE[/ENCODING]
-Specify output type as text, html, sql, -csv, gml, dot, xml, none or blacklist. -Default type is text. The various output types are documented -below. -
- -The ENCODING specifies the output encoding, the default is -that of your locale. Valid encodings are listed at -http://docs.python.org/library/codecs.html#standard-encodings. -
- -Command line option: --output -
quiet=[0|1]
-If set, operate quiet. An alias for log=none. -This is only useful with fileoutput. -
- -Command line option: --verbose -
status=[0|1]
-Control printing check status messages. Default is 1. -
- -Command line option: --no-status -
verbose=[0|1]
-If set log all checked URLs once. Default is to log only errors and warnings. -
- -Command line option: --verbose -
warnings=[0|1]
-If set log warnings. Default is to log warnings. -
- -Command line option: --no-warnings -
-  -

[text]

- -
-
filename=STRING
-Specify output filename for text logging. Default filename is -linkchecker-out.txt. -
- -Command line option: --file-output= -
parts=STRING
-Comma-separated list of parts that have to be logged. -See LOGGER PARTS below. -
- -Command line option: none -
encoding=STRING
-Valid encodings are listed in -http://docs.python.org/library/codecs.html#standard-encodings. -
- -Default encoding is iso-8859-15. -
color*
-Color settings for the various log parts, syntax is color or -type;color. The type can be -bold, light, blink, invert. -The color can be -default, black, red, green, yellow, blue, -purple, cyan, white, Black, Red, Green, -Yellow, Blue, Purple, Cyan or White. -
- -Command line option: none -
colorparent=STRING
-Set parent color. Default is white. -
colorurl=STRING
-Set URL color. Default is default. -
colorname=STRING
-Set name color. Default is default. -
colorreal=STRING
-Set real URL color. Default is cyan. -
colorbase=STRING
-Set base URL color. Default is purple. -
colorvalid=STRING
-Set valid color. Default is bold;green. -
colorinvalid=STRING
-Set invalid color. Default is bold;red. -
colorinfo=STRING
-Set info color. Default is default. -
colorwarning=STRING
-Set warning color. Default is bold;yellow. -
colordltime=STRING
-Set download time color. Default is default. -
colorreset=STRING
-Set reset color. Default is deault. -
-  -

[gml]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
-  -

[dot]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
-  -

[csv]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
separator=CHAR
-Set CSV separator. Default is a comma (,). -
quotechar=CHAR
-Set CSV quote character. Default is a double quote ("). -
-  -

[sql]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
dbname=STRING
-Set database name to store into. Default is linksdb. -
separator=CHAR
-Set SQL command separator character. Default is a semicolor (;). -
-  -

[html]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
colorbackground=COLOR
-Set HTML background color. Default is #fff7e5. -
colorurl=
-Set HTML URL color. Default is #dcd5cf. -
colorborder=
-Set HTML border color. Default is #000000. -
colorlink=
-Set HTML link color. Default is #191c83. -
colorwarning=
-Set HTML warning color. Default is #e0954e. -
colorerror=
-Set HTML error color. Default is #db4930. -
colorok=
-Set HTML valid color. Default is #3ba557. -
-  -

[blacklist]

- -
-
filename=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
-  -

[xml]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
-  -

[gxml]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
-  -

[sitemap]

- -
-
filename=STRING
-See [text] section above. -
parts=STRING
-See [text] section above. -
encoding=STRING
-See [text] section above. -
priority=FLOAT
-A number between 0.0 and 1.0 determining the priority. The default -priority for the first URL is 1.0, for all child URLs 0.5. -
frequency=[always|hourly|daily|weekly|monthly|yearly|never]
-The frequence pages are changing with. -
-  -

LOGGER PARTS

- -
 all       (for all parts) -
 id        (a unique ID for each logentry) -
 realurl   (the full url link) -
 result    (valid or invalid, with messages) -
 extern    (1 or 0, only in some logger types reported) -
 base      (base href=...) -
 name      (<a href=...>name</a> and <img alt="name">) -
 parenturl (if any) -
 info      (some additional info, e.g. FTP welcome messages) -
 warning   (warnings) -
 dltime    (download time) -
 checktime (check time) -
 url       (the original url name, can be relative) -
 intro     (the blurb at the beginning, "starting at ...") -
 outro     (the blurb at the end, "found x errors ...") -  -

MULTILINE

- -Some option values can span multiple lines. Each line has to be indented -for that to work. Lines starting with a hash (#) will be ignored, -though they must still be indented. -

-
 ignore= -
   lconline -
   bookmark -
   # a comment -
   ^mailto: -  -

EXAMPLE

- -
 [output] -
 log=html -

-
 [checking] -
 threads=5 -

-
 [filtering] -
 ignorewarnings=http-moved-permanent -  -

WARNINGS

- -The following warnings are recognized in the 'ignorewarnings' config -file entry: -
- -
-
file-missing-slash
-The file: URL is missing a trailing slash. -
file-system-path
-The file: path is not the same as the system specific path. -
ftp-missing-slash
-The ftp: URL is missing a trailing slash. -
http-auth-unknonwn
-Unsupported HTTP authentication method. -
http-cookie-store-error
-An error occurred while storing a cookie. -
http-decompress-error
-An error occurred while decompressing the URL content. -
http-empty-content
-The URL had no content. -
http-moved-permanent
-The URL has moved permanently. -
http-robots-denied
-The http: URL checking has been denied. -
http-unsupported-encoding
-The URL content is encoded with an unknown encoding. -
http-wrong-redirect
-The URL has been redirected to an URL of a different type. -
https-certificate-error
-The SSL certificate is invalid or expired. -
ignore-url
-The URL has been ignored. -
mail-no-connection
-No connection to a MX host could be established. -
mail-no-mx-host
-The mail MX host could not be found. -
mail-unverified-address
-The mailto: address could not be verified. -
nntp-no-newsgroup
-The NNTP newsgroup could not be found. -
nntp-no-server
-No NNTP server was found. -
url-anchor-not-found
-URL anchor was not found. -
url-content-size-unequal
-The URL content size and download size are unequal. -
url-content-size-zero
-The URL content size is zero. -
url-content-too-large
-The URL content size is too large. -
url-effective-url
-The effective URL is different from the original. -
url-error-getting-content
-Could not get the content of the URL. -
url-obfuscated-ip
-The IP is obfuscated. -
url-warnregex-found
-The warning regular expression was found in the URL contents. -
url-whitespace
-The URL contains leading or trailing whitespace. -

-

-  -

SEE ALSO

- -linkchecker(1) -  -

AUTHOR

- -Bastian Kleineidam <bastian.kleineidam@web.de> -  -

COPYRIGHT

- -Copyright © 2000-2014 Bastian Kleineidam -

- -


- 

Index

-
-
NAME
-
DESCRIPTION
-
SETTINGS
-
-
[checking]
-
[filtering]
-
[authentication]
-
[output]
-
[text]
-
[gml]
-
[dot]
-
[csv]
-
[sql]
-
[html]
-
[blacklist]
-
[xml]
-
[gxml]
-
[sitemap]
-
-
LOGGER PARTS
-
MULTILINE
-
EXAMPLE
-
WARNINGS
-
SEE ALSO
-
AUTHOR
-
COPYRIGHT
-
-
-This document was created by -man2html, -using the manual pages.
- - - +
+
+

+linkcheckerrc is the configuration file for LinkChecker. The file is + written in an INI-style format. +
+The default file location is ~/.linkchecker/linkcheckerrc on Unix, + %HOMEPATH%\.linkchecker\linkcheckerrc on Windows systems. +
+
+

+
+

+
+
cookiefile=filename
+
Read a file with initial cookie data. The cookie data format is explained + in linkchecker(1). +
+ Command line option: --cookiefile
+
localwebroot=STRING
+
When checking absolute URLs inside local files, the given root directory + is used as base URL. +
+ Note that the given directory must have URL syntax, so it must use a slash + to join directories instead of a backslash. And the given directory must + end with a slash. +
+ Command line option: none
+
nntpserver=STRING
+
Specify an NNTP server for news: links. Default is the environment + variable NNTP_SERVER. If no host is given, only the syntax of the + link is checked. +
+ Command line option: --nntp-server
+
recursionlevel=NUMBER
+
Check recursively all links up to given depth. A negative depth will + enable infinite recursion. Default depth is infinite. +
+ Command line option: --recursion-level
+
threads=NUMBER
+
Generate no more than the given number of threads. Default number of + threads is 10. To disable threading specify a non-positive number. +
+ Command line option: --threads
+
timeout=NUMBER
+
Set the timeout for connection attempts in seconds. The default timeout is + 60 seconds. +
+ Command line option: --timeout
+
aborttimeout=NUMBER
+
Time to wait for checks to finish after the user aborts the first time + (with Ctrl-C or the abort button). The default abort timeout is 300 + seconds. +
+ Command line option: --timeout
+
useragent=STRING
+
Specify the User-Agent string to send to the HTTP server, for example + "Mozilla/4.0". The default is "LinkChecker/X.Y" where + X.Y is the current version of LinkChecker. +
+ Command line option: --user-agent
+
sslverify=[0|1|filename]
+
If set to zero disables SSL certificate checking. If set to one (the + default) enables SSL certificate checking with the provided CA certificate + file. If a filename is specified, it will be used as the certificate file. +
+ Command line option: none
+
maxrunseconds=NUMBER
+
Stop checking new URLs after the given number of seconds. Same as if the + user stops (by hitting Ctrl-C) after the given number of seconds. +
+ The default is not to stop until all URLs are checked. +
+ Command line option: none
+
maxnumurls=NUMBER
+
Maximum number of URLs to check. New URLs will not be queued after the + given number of URLs is checked. +
+ The default is to queue and check all URLs. +
+ Command line option: none
+
maxrequestspersecond=NUMBER
+
Limit the maximum number of requests per second to one host.
+
allowedschemes=NAME[,NAME...]
+
Allowed URL schemes as comma-separated list.
+
+
+
+

+
+
ignore=REGEX (MULTILINE)
+
Only check syntax of URLs matching the given regular expressions. +
+ Command line option: --ignore-url
+
ignorewarnings=NAME[,NAME...]
+
Ignore the comma-separated list of warnings. See WARNINGS for the + list of supported warnings. +
+ Command line option: none
+
internlinks=REGEX
+
Regular expression to add more URLs recognized as internal links. Default + is that URLs given on the command line are internal. +
+ Command line option: none
+
nofollow=REGEX (MULTILINE)
+
Check but do not recurse into URLs matching the given regular expressions. +
+ Command line option: --no-follow-url
+
checkextern=[0|1]
+
Check external links. Default is to check internal links only. +
+ Command line option: --checkextern
+
+
+
+

+
+
entry=REGEX USER [PASS] (MULTILINE)
+
Provide different user/password pairs for different link types. Entries + are a triple (URL regex, username, password) or a tuple (URL regex, + username), where the entries are separated by whitespace. +
+ The password is optional and if missing it has to be entered at the + commandline. +
+ If the regular expression matches the checked URL, the given user/password + pair is used for authentication. The commandline options -u and + -p match every link and therefore override the entries given here. + The first match wins. At the moment, authentication is used/needed for + http[s] and ftp links. +
+ Command line option: -u, -p
+
loginurl=URL
+
A login URL to be visited before checking. Also needs authentication data + set for it.
+
loginuserfield=STRING
+
The name of the user CGI field. Default name is login.
+
loginpasswordfield=STRING
+
The name of the password CGI field. Default name is password.
+
loginextrafields=NAME:VALUE (MULTILINE)
+
Optionally any additional CGI name/value pairs. Note that the default + values are submitted automatically.
+
+
+
+

+
+
debug=STRING[,STRING...]
+
Print debugging output for the given modules. Available debug modules are + cmdline, checking, cache, dns, thread, + plugins and all. Specifying all is an alias for + specifying all available loggers. +
+ Command line option: --debug
+
fileoutput=TYPE[,TYPE...]
+
Output to a files linkchecker-out.TYPE, + $HOME/.linkchecker/blacklist for blacklist output. +
+ Valid file output types are text, html, sql, + csv, gml, dot, xml, none or + blacklist Default is no file output. The various output types are + documented below. Note that you can suppress all console output with + output=none. +
+ Command line option: --file-output
+
log=TYPE[/ENCODING]
+
Specify output type as text, html, sql, csv, + gml, dot, xml, none or blacklist. + Default type is text. The various output types are documented + below. +
+ The ENCODING specifies the output encoding, the default is that of + your locale. Valid encodings are listed at + http://docs.python.org/library/codecs.html#standard-encodings. +
+ Command line option: --output
+
quiet=[0|1]
+
If set, operate quiet. An alias for log=none. This is only useful + with fileoutput. +
+ Command line option: --verbose
+
status=[0|1]
+
Control printing check status messages. Default is 1. +
+ Command line option: --no-status
+
verbose=[0|1]
+
If set log all checked URLs once. Default is to log only errors and + warnings. +
+ Command line option: --verbose
+
warnings=[0|1]
+
If set log warnings. Default is to log warnings. +
+ Command line option: --no-warnings
+
+
+
+

+
+
filename=STRING
+
Specify output filename for text logging. Default filename is + linkchecker-out.txt. +
+ Command line option: --file-output=
+
parts=STRING
+
Comma-separated list of parts that have to be logged. See LOGGER + PARTS below. +
+ Command line option: none
+
encoding=STRING
+
Valid encodings are listed in + http://docs.python.org/library/codecs.html#standard-encodings. +
+ Default encoding is iso-8859-15.
+
color*
+
Color settings for the various log parts, syntax is color or + type;color. The type can be bold, + light, blink, invert. The color can be + default, black, red, green, yellow, + blue, purple, cyan, white, Black, + Red, Green, Yellow, Blue, Purple, + Cyan or White. +
+ Command line option: none
+
colorparent=STRING
+
Set parent color. Default is white.
+
colorurl=STRING
+
Set URL color. Default is default.
+
colorname=STRING
+
Set name color. Default is default.
+
colorreal=STRING
+
Set real URL color. Default is cyan.
+
colorbase=STRING
+
Set base URL color. Default is purple.
+
colorvalid=STRING
+
Set valid color. Default is bold;green.
+
colorinvalid=STRING
+
Set invalid color. Default is bold;red.
+
colorinfo=STRING
+
Set info color. Default is default.
+
colorwarning=STRING
+
Set warning color. Default is bold;yellow.
+
colordltime=STRING
+
Set download time color. Default is default.
+
colorreset=STRING
+
Set reset color. Default is default.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
separator=CHAR
+
Set CSV separator. Default is a comma (,).
+
quotechar=CHAR
+
Set CSV quote character. Default is a double quote (").
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
dbname=STRING
+
Set database name to store into. Default is linksdb.
+
separator=CHAR
+
Set SQL command separator character. Default is a semicolon + (;).
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
colorbackground=COLOR
+
Set HTML background color. Default is #fff7e5.
+
colorurl=
+
Set HTML URL color. Default is #dcd5cf.
+
colorborder=
+
Set HTML border color. Default is #000000.
+
colorlink=
+
Set HTML link color. Default is #191c83.
+
colorwarning=
+
Set HTML warning color. Default is #e0954e.
+
colorerror=
+
Set HTML error color. Default is #db4930.
+
colorok=
+
Set HTML valid color. Default is #3ba557.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
+
+
+

+
+
filename=STRING
+
See [text] section above.
+
parts=STRING
+
See [text] section above.
+
encoding=STRING
+
See [text] section above.
+
priority=FLOAT
+
A number between 0.0 and 1.0 determining the priority. The default + priority for the first URL is 1.0, for all child URLs 0.5.
+
frequency=[always|hourly|daily|weekly|monthly|yearly|never]
+
How frequently pages are changing.
+
+
+
+
+

+ all (for all parts) + id (a unique ID for each logentry) + realurl (the full url link) + result (valid or invalid, with messages) + extern (1 or 0, only in some logger types reported) + base (base href=...) + name (<a href=...>name</a> and <img + alt="name">) + parenturl (if any) + info (some additional info, e.g. FTP welcome messages) + warning (warnings) + dltime (download time) + checktime (check time) + url (the original url name, can be relative) + intro (the blurb at the beginning, "starting at ...") + outro (the blurb at the end, "found x errors ...") +
+
+

+Some option values can span multiple lines. Each line has to be indented for + that to work. Lines starting with a hash (#) will be ignored, though + they must still be indented. +

+ ignore= + lconline + bookmark + # a comment + ^mailto:

+
+
+

+ [output] + log=html +

+ [checking] + threads=5

+

+ [filtering] + ignorewarnings=http-moved-permanent

+

+
+
+

+All plugins have a separate section. If the section appears in the configuration + file the plugin is enabled. Some plugins read extra options in their section. +

+
+

+Checks validity of HTML anchors. +

+
+
+

+Adds the country and if possible city name of the URL host as info. Needs GeoIP + or pygeoip and a local country or city lookup DB installed. +

+
+
+

+Define a regular expression which prints a warning if it matches any content of + the checked link. This applies only to valid pages, so we can get their + content. +
+
warningregex=REGEX
+
Use this to check for pages that contain some form of error message, for + example "This page has moved" or "Oracle Application + error". REGEX should be unquoted. +

Note that multiple values can be combined in the regular + expression, for example "(This page has moved|Oracle Application + error)".

+

+
+
+
+
+

+Check SSL certificate expiration date. Only internal https: links will be + checked. A domain will only be checked once to avoid duplicate warnings. +
+
sslcertwarndays=NUMBER
+
Configures the expiration warning time in days. +

+
+
+
+
+

+Check the syntax of HTML pages with the online W3C HTML validator. See + http://validator.w3.org/docs/api.html. +

+
+
+

+Print HTTP headers in URL info. +
+
prefixes=prefix1[,prefix2]...
+
List of comma separated header prefixes. For example to display all HTTP + headers that start with "X-". +

+
+
+
+
+

+Check the syntax of HTML pages with the online W3C CSS validator. See + http://jigsaw.w3.org/css-validator/manual.html#expert. +

+
+
+

+Checks the page content for virus infections with clamav. A local clamav daemon + must be installed. +
+
clamavconf=filename
+
Filename of clamd.conf config file.
+
+
+
+

+Parse PDF files for URLs to check. Needs the pdfminer Python package + installed. +

+
+
+

+Parse Word files for URLs to check. Needs the pywin32 Python extension + installed. +

+
+
+
+

+The following warnings are recognized in the 'ignorewarnings' config file entry: +
+
+
file-missing-slash
+
The file: URL is missing a trailing slash.
+
file-system-path
+
The file: path is not the same as the system specific path.
+
ftp-missing-slash
+
The ftp: URL is missing a trailing slash.
+
http-cookie-store-error
+
An error occurred while storing a cookie.
+
http-empty-content
+
The URL had no content.
+
mail-no-mx-host
+
The mail MX host could not be found.
+
nntp-no-newsgroup
+
The NNTP newsgroup could not be found.
+
nntp-no-server
+
No NNTP server was found.
+
url-content-size-zero
+
The URL content size is zero.
+
url-content-too-large
+
The URL content size is too large.
+
url-effective-url
+
The effective URL is different from the original.
+
url-error-getting-content
+
Could not get the content of the URL.
+
url-obfuscated-ip
+
The IP is obfuscated.
+
url-whitespace
+
The URL contains leading or trailing whitespace. +

+
+
+
+
+

+linkchecker(1) +
+
+

+Bastian Kleineidam <bastian.kleineidam@web.de> +
+
+

+Copyright © 2000-2014 Bastian Kleineidam +
+
+ + + + + +
2007-11-30LinkChecker
+ + From a205a3722b58ac51acab5e18feaaf12617a13fc8 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 2/6] Update man pages to optimise for both html and man - Use "LinkChecker User Manual" as the source for both pages. - .UR/.UE for external links to allow mandoc to create links in html. - Use Linux man-pages format for cross references e.g. .BR linkcheckerrc (5) which are replace in the html by the Makefile. --- doc/en/linkchecker.1 | 187 +++++++++++---------- doc/en/linkcheckerrc.5 | 88 +++++----- doc/web/media/man1/linkchecker.1.html | 213 +++++++++++++----------- doc/web/media/man5/linkcheckerrc.5.html | 135 ++++++++++----- 4 files changed, 355 insertions(+), 268 deletions(-) diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 0ba1ad8e..9b1c7798 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -1,12 +1,12 @@ -.TH LINKCHECKER 1 2010-07-01 "LinkChecker" "LinkChecker commandline usage" +.TH LINKCHECKER 1 2020-04-24 "LinkChecker" "LinkChecker User Manual" .SH NAME -linkchecker - command line client to check HTML documents and websites for broken links -. +linkchecker \- command line client to check HTML documents and websites for broken links .SH SYNOPSIS -\fBlinkchecker\fP [\fIoptions\fP] [\fIfile-or-url\fP]... -. +.B linkchecker +.RI [ options ] +.RI [ file-or-url ]... .SH DESCRIPTION -.LP +.TP 2 LinkChecker features .IP \(bu recursive and multithreaded checking, @@ -33,30 +33,30 @@ Antivirus check .IP \(bu a command line and web interface .SH EXAMPLES +.TP 2 The most common use checks the given domain recursively: - \fBlinkchecker http://www.example.com/\fP +.B linkchecker http://www.example.com/ .br Beware that this checks the whole site which can have thousands of URLs. Use the \fB\-r\fP option to restrict the recursion depth. -.br +.TP Don't check URLs with \fB/secret\fP in its name. All other links are checked as usual: - \fBlinkchecker \-\-ignore\-url=/secret mysite.example.com\fP -.br +.B linkchecker \-\-ignore\-url=/secret mysite.example.com +.TP Checking a local HTML file on Unix: - \fBlinkchecker ../bla.html\fP -.br +.B linkchecker ../bla.html +.TP Checking a local HTML file on Windows: - \fBlinkchecker c:\\temp\\test.html\fP -.br +.B linkchecker c:\\temp\\test.html +.TP You can skip the \fBhttp://\fP url part if the domain starts with \fBwww.\fP: - \fBlinkchecker www.example.com\fP -.br +.B linkchecker www.example.com +.TP You can skip the \fBftp://\fP url part if the domain starts with \fBftp.\fP: - \fBlinkchecker \-r0 ftp.example.com\fP -.br +.B linkchecker \-r0 ftp.example.com +.TP Generate a sitemap graph and convert it with the graphviz dot utility: - \fBlinkchecker \-odot \-v www.example.com | dot \-Tps > sitemap.ps\fP -. +.B linkchecker \-odot \-v www.example.com | dot \-Tps > sitemap.ps .SH OPTIONS .SS General options .TP @@ -99,7 +99,8 @@ Output to a file \fBlinkchecker\-out.\fP\fITYPE\fP, The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -\fBhttp://docs.python.org/library/\:codecs.html#standard-encodings\fP. +.UR http://docs.python.org/library/codecs.html#standard-encodings +.UE . .br The \fIFILENAME\fP and \fIENCODING\fP parts of the \fBnone\fP output type will be ignored, else if the file already exists, it will be overwritten. @@ -126,7 +127,8 @@ below. .br The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -\fBhttp://docs.python.org/library/\:codecs.html#standard-encodings\fP. +.UR http://docs.python.org/library/codecs.html#standard-encodings +.UE . .TP \fB\-q\fP, \fB\-\-quiet\fP Quiet operation, an alias for \fB\-o none\fP. @@ -203,7 +205,9 @@ version of LinkChecker. .SH "CONFIGURATION FILES" Configuration files can specify all options above. They can also specify some options that cannot be set on the command line. -See \fBlinkcheckerrc\fP(5) for more info. +See +.BR linkcheckerrc (5) +for more info. .SH OUTPUT TYPES Note that by default only errors and warnings are logged. @@ -236,7 +240,8 @@ Log check result as machine-readable XML. .TP \fBsitemap\fP Log check result as an XML sitemap whose protocol is documented at -\fBhttp://www.sitemaps.org/protocol.html\fP. +.UR http://www.sitemaps.org/protocol.html +.UE . .TP \fBsql\fP Log check result as SQL script with INSERT commands. An example @@ -252,7 +257,10 @@ Logs nothing. Suitable for debugging or checking the exit code. . .SH REGULAR EXPRESSIONS LinkChecker accepts Python regular expressions. -See \fBhttp://docs.python.org/\:howto/regex.html\fP for an introduction. +See +.UR http://docs.python.org/howto/regex.html +.UE +for an introduction. An addition is that a leading exclamation mark negates the regular expression. @@ -276,15 +284,15 @@ Multiple entries are separated by a blank line. The example below will send two cookies to all URLs starting with \fBhttp://example.com/hello/\fP and one to all URLs starting with \fBhttps://example.org/\fP: - - Host: example.com - Path: /hello - Set-cookie: ID="smee" - Set-cookie: spam="egg" - - Host: example.org - Set-cookie: baggage="elitist"; comment="hologram" - +.EX + Host: example.com + Path: /hello + Set-cookie: ID="smee" + Set-cookie: spam="egg" +.PP + Host: example.org + Set-cookie: baggage="elitist"; comment="hologram" +.EE .SH PROXY SUPPORT To use a proxy on Unix or Windows set the $http_proxy, $https_proxy or $ftp_proxy environment variables to the proxy URL. The URL should be of the form @@ -292,29 +300,27 @@ environment variables to the proxy URL. The URL should be of the form LinkChecker also detects manual proxy settings of Internet Explorer under Windows systems, and gconf or KDE on Linux systems. On a Mac use the Internet Config to select a proxy. -. +.PP You can also set a comma-separated domain list in the $no_proxy environment variables to ignore any proxy settings for these domains. -. +.TP Setting a HTTP proxy on Unix for example looks like this: - - export http_proxy="http://proxy.example.com:8080" - +.B +export http_proxy="http://proxy.example.com:8080" +.TP Proxy authentication is also supported: - - export http_proxy="http://user1:mypass@proxy.example.org:8081" - +.B +export http_proxy="http://user1:mypass@proxy.example.org:8081" +.TP Setting a proxy on the Windows command prompt: - - set http_proxy=http://proxy.example.com:8080 - +.B +set http_proxy=http://proxy.example.com:8080 .SH PERFORMED CHECKS All URLs have to pass a preliminary syntax test. Minor quoting mistakes will issue a warning, all other invalid syntax issues are errors. After the syntax check passes, the URL is queued for connection checking. All connection check types are described below. -. .TP HTTP links (\fBhttp:\fP, \fBhttps:\fP) After connecting to the given HTTP server the given path @@ -322,75 +328,74 @@ or query is requested. All redirections are followed, and if user/password is given it will be used as authorization when necessary. All final HTTP status codes other than 2xx are errors. -. +.IP HTML page contents are checked for recursion. .TP Local files (\fBfile:\fP) A regular, readable file that can be opened is valid. A readable directory is also valid. All other files, for example device files, unreadable or non-existing files are errors. -. +.IP HTML or other parseable file contents are checked for recursion. .TP Mail links (\fBmailto:\fP) A mailto: link eventually resolves to a list of email addresses. If one address fails, the whole list will fail. For each mail address we check the following things: -. - 1) Check the adress syntax, both of the part before and after - the @ sign. - 2) Look up the MX DNS records. If we found no MX record, - print an error. - 3) Check if one of the mail hosts accept an SMTP connection. - Check hosts with higher priority first. - If no host accepts SMTP, we print a warning. - 4) Try to verify the address with the VRFY command. If we got - an answer, print the verified address as an info. +.br +1) Check the adress syntax, both of the part before and after the @ sign. +.br +2) Look up the MX DNS records. If we found no MX record, print an error. +.br +3) Check if one of the mail hosts accept an SMTP connection. +Check hosts with higher priority first. +If no host accepts SMTP, we print a warning. +.br +4) Try to verify the address with the VRFY command. If we got an answer, +print the verified address as an info. + .TP FTP links (\fBftp:\fP) - - For FTP links we do: - - 1) connect to the specified host - 2) try to login with the given user and password. The default - user is ``anonymous``, the default password is ``anonymous@``. - 3) try to change to the given directory - 4) list the file with the NLST command +For FTP links we do: +.br +1) connect to the specified host +.br +2) try to login with the given user and password. The default +user is ``anonymous``, the default password is ``anonymous@``. +.br +3) try to change to the given directory +.br +4) list the file with the NLST command .TP Telnet links (``telnet:``) - - We try to connect and if user/password are given, login to the - given telnet server. +We try to connect and if user/password are given, login to the +given telnet server. .TP NNTP links (``news:``, ``snews:``, ``nntp``) - - We try to connect to the given NNTP server. If a news group or - article is specified, try to request it from the server. +We try to connect to the given NNTP server. If a news group or +article is specified, try to request it from the server. .TP Unsupported links (``javascript:``, etc.) - - An unsupported link will only print a warning. No further checking - will be made. - - The complete list of recognized, but unsupported links can be found - in the \fBlinkcheck/checker/unknownurl.py\fP source file. - The most prominent of them should be JavaScript links. - +An unsupported link will only print a warning. No further checking +will be made. +.IP +The complete list of recognized, but unsupported links can be found +in the \fBlinkcheck/checker/unknownurl.py\fP source file. +The most prominent of them should be JavaScript links. .SH PLUGINS There are two plugin types: connection and content plugins. -. Connection plugins are run after a successful connection to the URL host. -. Content plugins are run if the URL type has content (mailto: URLs have no content for example) and if the check is not forbidden (ie. by HTTP robots.txt). -. +.PP See \fBlinkchecker \-\-list\-plugins\fP for a list of plugins and -their documentation. All plugins are enabled via the \fBlinkcheckerrc\fP(5) +their documentation. All plugins are enabled via the +.BR linkcheckerrc (5) configuration file. .SH RECURSION @@ -455,11 +460,11 @@ same as the host of the user browsing your pages. . .SH RETURN VALUE The return value is 2 when -.IP \(bu +.IP \(bu 2 a program error occurred. .PP The return value is 1 when -.IP \(bu +.IP \(bu 2 invalid links were found or .IP \(bu link warnings were found and warnings are enabled @@ -478,12 +483,16 @@ might slow down the program or even the whole system. .br \fBlinkchecker\-out.\fP\fITYPE\fP - default logger file output name .br -\fBhttp://docs.python.org/library/codecs.html#standard-encodings\fP - valid output encodings +.UR http://docs.python.org/library/codecs.html#standard-encodings +.UE +\- valid output encodings .br -\fBhttp://docs.python.org/howto/regex.html\fP - regular expression documentation +.UR http://docs.python.org/howto/regex.html +.UE +\- regular expression documentation .SH "SEE ALSO" -\fBlinkcheckerrc\fP(5) +.BR linkcheckerrc (5) . .SH AUTHOR Bastian Kleineidam diff --git a/doc/en/linkcheckerrc.5 b/doc/en/linkcheckerrc.5 index b2095288..05e21afa 100644 --- a/doc/en/linkcheckerrc.5 +++ b/doc/en/linkcheckerrc.5 @@ -1,4 +1,4 @@ -.TH linkcheckerrc 5 2007-11-30 "LinkChecker" +.TH LINKCHECKERRC 5 2020-04-24 "LinkChecker" "LinkChecker User Manual" .SH NAME linkcheckerrc - configuration file for LinkChecker . @@ -13,7 +13,8 @@ The default file location is \fB~/.linkchecker/linkcheckerrc\fP on Unix, .TP \fBcookiefile=\fP\fIfilename\fP Read a file with initial cookie data. The cookie data -format is explained in linkchecker(1). +format is explained in +.BR linkchecker (1). .br Command line option: \fB\-\-cookiefile\fP .TP @@ -188,7 +189,8 @@ below. .br The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -\fBhttp://docs.python.org/library/codecs.html#standard-encodings\fP. +.UR http://docs.python.org/library/codecs.html#standard-encodings +.UE . .br Command line option: \fB\-\-output\fP .TP @@ -228,7 +230,8 @@ Command line option: none .TP \fBencoding=\fP\fISTRING\fP Valid encodings are listed in -\fBhttp://docs.python.org/library/codecs.html#standard-encodings\fP. +.UR http://docs.python.org/library/codecs.html#standard-encodings +.UE . .br Default encoding is \fBiso\-8859\-15\fP. .TP @@ -404,42 +407,47 @@ priority for the first URL is 1.0, for all child URLs 0.5. How frequently pages are changing. . .SH "LOGGER PARTS" - \fBall\fP (for all parts) - \fBid\fP (a unique ID for each logentry) - \fBrealurl\fP (the full url link) - \fBresult\fP (valid or invalid, with messages) - \fBextern\fP (1 or 0, only in some logger types reported) - \fBbase\fP (base href=...) - \fBname\fP (name and name) - \fBparenturl\fP (if any) - \fBinfo\fP (some additional info, e.g. FTP welcome messages) - \fBwarning\fP (warnings) - \fBdltime\fP (download time) - \fBchecktime\fP (check time) - \fBurl\fP (the original url name, can be relative) - \fBintro\fP (the blurb at the beginning, "starting at ...") - \fBoutro\fP (the blurb at the end, "found x errors ...") +.TS +nokeep, tab(@); +ll. +\fBall\fP@(for all parts) +\fBid\fP@(a unique ID for each logentry) +\fBrealurl\fP@(the full url link) +\fBresult\fP@(valid or invalid, with messages) +\fBextern\fP@(1 or 0, only in some logger types reported) +\fBbase\fP@(base href=...) +\fBname\fP@(name and name) +\fBparenturl\fP@(if any) +\fBinfo\fP@(some additional info, e.g. FTP welcome messages) +\fBwarning\fP@(warnings) +\fBdltime\fP@(download time) +\fBchecktime\fP@(check time) +\fBurl\fP@(the original url name, can be relative) +\fBintro\fP@(the blurb at the beginning, "starting at ...") +\fBoutro\fP@(the blurb at the end, "found x errors ...") +.TE .SH MULTILINE Some option values can span multiple lines. Each line has to be indented for that to work. Lines starting with a hash (\fB#\fP) will be ignored, though they must still be indented. - - ignore= - lconline - bookmark - # a comment - ^mailto: -. +.EX +ignore= + lconline + bookmark + # a comment + ^mailto: +.EE .SH EXAMPLE - [output] - log=html - - [checking] - threads=5 - - [filtering] - ignorewarnings=http-moved-permanent - +.EX +[output] +log=html +.PP +[checking] +threads=5 +.PP +[filtering] +ignorewarnings=http-moved-permanent +.EE .SH PLUGINS All plugins have a separate section. If the section appears in the configuration file the plugin is enabled. @@ -475,7 +483,9 @@ Configures the expiration warning time in days. .SS \fB[HtmlSyntaxCheck]\fP Check the syntax of HTML pages with the online W3C HTML validator. -See http://validator.w3.org/docs/api.html. +See +.UR http://validator.w3.org/docs/api.html +.UE . .SS \fB[HttpHeaderInfo]\fP Print HTTP headers in URL info. @@ -486,7 +496,9 @@ to display all HTTP headers that start with "X-". .SS \fB[CssSyntaxCheck]\fP Check the syntax of HTML pages with the online W3C CSS validator. -See http://jigsaw.w3.org/css-validator/manual.html#expert. +See +.UR http://jigsaw.w3.org/css-validator/manual.html#expert +.UE . .SS \fB[VirusCheck]\fP Checks the page content for virus infections with clamav. @@ -551,7 +563,7 @@ The IP is obfuscated. The URL contains leading or trailing whitespace. .SH "SEE ALSO" -linkchecker(1) +.BR linkchecker (1) . .SH AUTHOR Bastian Kleineidam diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index 9344feab..122473e6 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -20,7 +20,7 @@ - +
LINKCHECKER(1)LinkChecker commandline usageLinkChecker User Manual LINKCHECKER(1)
@@ -36,7 +36,10 @@ linkchecker - command line client to check HTML documents and websites for

-LinkChecker features +
+
LinkChecker features
+
+
  • recursive and multithreaded checking,
  • output in colored or normal text, HTML, SQL, CSV, XML or a sitemap graph @@ -56,30 +59,30 @@ LinkChecker features

-The most common use checks the given domain recursively: - linkchecker http://www.example.com/ -
-Beware that this checks the whole site which can have thousands of URLs. Use the - -r option to restrict the recursion depth. -
-Don't check URLs with /secret in its name. All other links are checked as - usual: - linkchecker --ignore-url=/secret mysite.example.com -
-Checking a local HTML file on Unix: - linkchecker ../bla.html -
-Checking a local HTML file on Windows: - linkchecker c:\temp\test.html -
-You can skip the http:// url part if the domain starts with www.: - linkchecker www.example.com -
-You can skip the ftp:// url part if the domain starts with ftp.: - linkchecker -r0 ftp.example.com -
-Generate a sitemap graph and convert it with the graphviz dot utility: - linkchecker -odot -v www.example.com | dot -Tps > sitemap.ps +
+
The most common use checks the given domain recursively:
+
linkchecker http://www.example.com/ +
+ Beware that this checks the whole site which can have thousands of URLs. Use + the -r option to restrict the recursion depth.
+
Don't check URLs with /secret in its name. All other links are + checked as usual:
+
linkchecker --ignore-url=/secret mysite.example.com
+
Checking a local HTML file on Unix:
+
linkchecker ../bla.html
+
Checking a local HTML file on Windows:
+
linkchecker c:empest.html
+
You can skip the http:// url part if the domain starts with + www.:
+
linkchecker www.example.com
+
You can skip the ftp:// url part if the domain starts with + ftp.:
+
linkchecker -r0 ftp.example.com
+
Generate a sitemap graph and convert it with the graphviz dot + utility:
+
linkchecker -odot -v www.example.com | dot -Tps > + sitemap.ps
+

@@ -120,7 +123,8 @@ Generate a sitemap graph and convert it with the graphviz dot utility: $HOME/.linkchecker/blacklist for blacklist output, or FILENAME if specified. The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed - at http://docs.python.org/library/codecs.html#standard-encodings. + at + http://docs.python.org/library/codecs.html#standard-encodings.
The FILENAME and ENCODING parts of the none output type will be ignored, else if the file already exists, it will be overwritten. @@ -142,7 +146,7 @@ Generate a sitemap graph and convert it with the graphviz dot utility:
The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed at - http://docs.python.org/library/codecs.html#standard-encodings. + http://docs.python.org/library/codecs.html#standard-encodings.
-q, --quiet
Quiet operation, an alias for -o none. This is only useful with -F.
@@ -247,7 +251,7 @@ Note that by default only errors and warnings are logged. You should use the
Log check result as machine-readable XML.
sitemap
Log check result as an XML sitemap whose protocol is documented at - http://www.sitemaps.org/protocol.html.
+ http://www.sitemaps.org/protocol.html.
sql
Log check result as SQL script with INSERT commands. An example script to create the initial SQL table is included as create.sql.
@@ -263,7 +267,8 @@ Note that by default only errors and warnings are logged. You should use the

LinkChecker accepts Python regular expressions. See - http://docs.python.org/howto/regex.html for an introduction. + http://docs.python.org/howto/regex.html + for an introduction.

An addition is that a leading exclamation mark negates the regular expression.

@@ -284,15 +289,16 @@ A cookie file contains standard HTTP header (RFC 2616) data with the following will send two cookies to all URLs starting with http://example.com/hello/ and one to all URLs starting with https://example.org/:

-

- Host: example.com - Path: /hello - Set-cookie: ID="smee" - Set-cookie: spam="egg"

-

- Host: example.org - Set-cookie: baggage="elitist"; comment="hologram"

-

+
+  Host: example.com
+  Path: /hello
+  Set-cookie: ID="smee"
+  Set-cookie: spam="egg"
+
+
+  Host: example.org
+  Set-cookie: baggage="elitist"; comment="hologram"
+

linkcheckerrc(5) configuration file. + the check is not forbidden (ie. by HTTP robots.txt). +

See linkchecker --list-plugins for a list of plugins and + their documentation. All plugins are enabled via the linkcheckerrc(5) + configuration file.

@@ -480,11 +495,11 @@ LinkChecker consumes memory for each queued URL to check. With thousands of
linkchecker-out.TYPE - default logger file output name
-http://docs.python.org/library/codecs.html#standard-encodings - valid - output encodings +http://docs.python.org/library/codecs.html#standard-encodings + - valid output encodings
-http://docs.python.org/howto/regex.html - regular expression - documentation +http://docs.python.org/howto/regex.html + - regular expression documentation

@@ -503,7 +518,7 @@ Copyright © 2000-2014 Bastian Kleineidam - +
2010-07-012020-04-24 LinkChecker
diff --git a/doc/web/media/man5/linkcheckerrc.5.html b/doc/web/media/man5/linkcheckerrc.5.html index 1841f978..cb6e29bb 100644 --- a/doc/web/media/man5/linkcheckerrc.5.html +++ b/doc/web/media/man5/linkcheckerrc.5.html @@ -14,14 +14,14 @@ code.Nm, code.Fl, code.Cm, code.Ic, code.In, code.Fd, code.Fn, code.Cd { font-weight: bold; font-family: inherit; } - linkcheckerrc(5) + LINKCHECKERRC(5) - - - + + +
linkcheckerrc(5)File Formats Manuallinkcheckerrc(5)LINKCHECKERRC(5)LinkChecker User ManualLINKCHECKERRC(5)
@@ -44,7 +44,7 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,
cookiefile=filename
Read a file with initial cookie data. The cookie data format is explained - in linkchecker(1). + in linkchecker(1).
Command line option: --cookiefile
localwebroot=STRING
@@ -201,7 +201,7 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,
The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed at - http://docs.python.org/library/codecs.html#standard-encodings. + http://docs.python.org/library/codecs.html#standard-encodings.
Command line option: --output
quiet=[0|1]
@@ -239,7 +239,7 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix, Command line option: none
encoding=STRING
Valid encodings are listed in - http://docs.python.org/library/codecs.html#standard-encodings. + http://docs.python.org/library/codecs.html#standard-encodings.
Default encoding is iso-8859-15.
color*
@@ -405,46 +405,97 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,

- all (for all parts) - id (a unique ID for each logentry) - realurl (the full url link) - result (valid or invalid, with messages) - extern (1 or 0, only in some logger types reported) - base (base href=...) - name (<a href=...>name</a> and <img - alt="name">) - parenturl (if any) - info (some additional info, e.g. FTP welcome messages) - warning (warnings) - dltime (download time) - checktime (check time) - url (the original url name, can be relative) - intro (the blurb at the beginning, "starting at ...") - outro (the blurb at the end, "found x errors ...") + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + +
all(for all parts)
id(a unique ID for each logentry)
realurl(the full url link)
result(valid or invalid, with messages)
extern(1 or 0, only in some logger types reported)
base(base href=...)
name(<a href=...>name</a> and <img + alt="name">)
parenturl(if any)
info(some additional info, e.g. FTP welcome messages)
warning(warnings)
dltime(download time)
checktime(check time)
url(the original url name, can be relative)
intro(the blurb at the beginning, "starting at ...")
outro(the blurb at the end, "found x errors ...")

Some option values can span multiple lines. Each line has to be indented for that to work. Lines starting with a hash (#) will be ignored, though they must still be indented. -

- ignore= - lconline - bookmark - # a comment - ^mailto:

+
+ignore=
+  lconline
+  bookmark
+  # a comment
+  ^mailto:
+

- [output] - log=html -

- [checking] - threads=5

-

- [filtering] - ignorewarnings=http-moved-permanent

-

+
+[output]
+log=html
+
+
+[checking]
+threads=5
+
+
+[filtering]
+ignorewarnings=http-moved-permanent
+

@@ -493,7 +544,7 @@ Check SSL certificate expiration date. Only internal https: links will be

Check the syntax of HTML pages with the online W3C HTML validator. See - http://validator.w3.org/docs/api.html. + http://validator.w3.org/docs/api.html.

@@ -510,7 +561,7 @@ Print HTTP headers in URL info.

Check the syntax of HTML pages with the online W3C CSS validator. See - http://jigsaw.w3.org/css-validator/manual.html#expert. + http://jigsaw.w3.org/css-validator/manual.html#expert.

@@ -575,7 +626,7 @@ The following warnings are recognized in the 'ignorewarnings' config file entry:

-linkchecker(1) +linkchecker(1)

@@ -588,7 +639,7 @@ Copyright © 2000-2014 Bastian Kleineidam
- +
2007-11-302020-04-24 LinkChecker
From e3b77f810ef547809aced97aca40410a005d76fe Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 3/6] Update external links in man pages to https --- doc/en/linkchecker.1 | 12 ++++++------ doc/en/linkcheckerrc.5 | 8 ++++---- doc/web/media/man1/linkchecker.1.html | 12 ++++++------ doc/web/media/man5/linkcheckerrc.5.html | 8 ++++---- 4 files changed, 20 insertions(+), 20 deletions(-) diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 9b1c7798..56384e9e 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -99,7 +99,7 @@ Output to a file \fBlinkchecker\-out.\fP\fITYPE\fP, The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -.UR http://docs.python.org/library/codecs.html#standard-encodings +.UR https://docs.python.org/library/codecs.html#standard-encodings .UE . .br The \fIFILENAME\fP and \fIENCODING\fP parts of the \fBnone\fP output type @@ -127,7 +127,7 @@ below. .br The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -.UR http://docs.python.org/library/codecs.html#standard-encodings +.UR https://docs.python.org/library/codecs.html#standard-encodings .UE . .TP \fB\-q\fP, \fB\-\-quiet\fP @@ -240,7 +240,7 @@ Log check result as machine-readable XML. .TP \fBsitemap\fP Log check result as an XML sitemap whose protocol is documented at -.UR http://www.sitemaps.org/protocol.html +.UR https://www.sitemaps.org/protocol.html .UE . .TP \fBsql\fP @@ -258,7 +258,7 @@ Logs nothing. Suitable for debugging or checking the exit code. .SH REGULAR EXPRESSIONS LinkChecker accepts Python regular expressions. See -.UR http://docs.python.org/howto/regex.html +.UR https://docs.python.org/howto/regex.html .UE for an introduction. @@ -483,11 +483,11 @@ might slow down the program or even the whole system. .br \fBlinkchecker\-out.\fP\fITYPE\fP - default logger file output name .br -.UR http://docs.python.org/library/codecs.html#standard-encodings +.UR https://docs.python.org/library/codecs.html#standard-encodings .UE \- valid output encodings .br -.UR http://docs.python.org/howto/regex.html +.UR https://docs.python.org/howto/regex.html .UE \- regular expression documentation diff --git a/doc/en/linkcheckerrc.5 b/doc/en/linkcheckerrc.5 index 05e21afa..49526789 100644 --- a/doc/en/linkcheckerrc.5 +++ b/doc/en/linkcheckerrc.5 @@ -189,7 +189,7 @@ below. .br The \fIENCODING\fP specifies the output encoding, the default is that of your locale. Valid encodings are listed at -.UR http://docs.python.org/library/codecs.html#standard-encodings +.UR https://docs.python.org/library/codecs.html#standard-encodings .UE . .br Command line option: \fB\-\-output\fP @@ -230,7 +230,7 @@ Command line option: none .TP \fBencoding=\fP\fISTRING\fP Valid encodings are listed in -.UR http://docs.python.org/library/codecs.html#standard-encodings +.UR https://docs.python.org/library/codecs.html#standard-encodings .UE . .br Default encoding is \fBiso\-8859\-15\fP. @@ -484,7 +484,7 @@ Configures the expiration warning time in days. .SS \fB[HtmlSyntaxCheck]\fP Check the syntax of HTML pages with the online W3C HTML validator. See -.UR http://validator.w3.org/docs/api.html +.UR https://validator.w3.org/docs/api.html .UE . .SS \fB[HttpHeaderInfo]\fP @@ -497,7 +497,7 @@ to display all HTTP headers that start with "X-". .SS \fB[CssSyntaxCheck]\fP Check the syntax of HTML pages with the online W3C CSS validator. See -.UR http://jigsaw.w3.org/css-validator/manual.html#expert +.UR https://jigsaw.w3.org/css-validator/manual.html#expert .UE . .SS \fB[VirusCheck]\fP diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index 122473e6..4dda688a 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -124,7 +124,7 @@ linkchecker - command line client to check HTML documents and websites for FILENAME if specified. The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed at - http://docs.python.org/library/codecs.html#standard-encodings. + https://docs.python.org/library/codecs.html#standard-encodings.
The FILENAME and ENCODING parts of the none output type will be ignored, else if the file already exists, it will be overwritten. @@ -146,7 +146,7 @@ linkchecker - command line client to check HTML documents and websites for
The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed at - http://docs.python.org/library/codecs.html#standard-encodings. + https://docs.python.org/library/codecs.html#standard-encodings.
-q, --quiet
Quiet operation, an alias for -o none. This is only useful with -F.
@@ -251,7 +251,7 @@ Note that by default only errors and warnings are logged. You should use the
Log check result as machine-readable XML.
sitemap
Log check result as an XML sitemap whose protocol is documented at - http://www.sitemaps.org/protocol.html.
+ https://www.sitemaps.org/protocol.html.
sql
Log check result as SQL script with INSERT commands. An example script to create the initial SQL table is included as create.sql.
@@ -267,7 +267,7 @@ Note that by default only errors and warnings are logged. You should use the

LinkChecker accepts Python regular expressions. See - http://docs.python.org/howto/regex.html + https://docs.python.org/howto/regex.html for an introduction.

An addition is that a leading exclamation mark negates the regular expression.

@@ -495,10 +495,10 @@ LinkChecker consumes memory for each queued URL to check. With thousands of
linkchecker-out.TYPE - default logger file output name
-http://docs.python.org/library/codecs.html#standard-encodings +https://docs.python.org/library/codecs.html#standard-encodings - valid output encodings
-http://docs.python.org/howto/regex.html +https://docs.python.org/howto/regex.html - regular expression documentation

diff --git a/doc/web/media/man5/linkcheckerrc.5.html b/doc/web/media/man5/linkcheckerrc.5.html index cb6e29bb..7debade5 100644 --- a/doc/web/media/man5/linkcheckerrc.5.html +++ b/doc/web/media/man5/linkcheckerrc.5.html @@ -201,7 +201,7 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,
The ENCODING specifies the output encoding, the default is that of your locale. Valid encodings are listed at - http://docs.python.org/library/codecs.html#standard-encodings. + https://docs.python.org/library/codecs.html#standard-encodings.
Command line option: --output
quiet=[0|1]
@@ -239,7 +239,7 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix, Command line option: none
encoding=STRING
Valid encodings are listed in - http://docs.python.org/library/codecs.html#standard-encodings. + https://docs.python.org/library/codecs.html#standard-encodings.
Default encoding is iso-8859-15.
color*
@@ -544,7 +544,7 @@ Check SSL certificate expiration date. Only internal https: links will be

Check the syntax of HTML pages with the online W3C HTML validator. See - http://validator.w3.org/docs/api.html. + https://validator.w3.org/docs/api.html.

@@ -561,7 +561,7 @@ Print HTTP headers in URL info.

Check the syntax of HTML pages with the online W3C CSS validator. See - http://jigsaw.w3.org/css-validator/manual.html#expert. + https://jigsaw.w3.org/css-validator/manual.html#expert.

From a506800c07b9c460ca8d1c9f20ceaf50b79a294d Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 4/6] Replace `` in man page with bold formatting --- doc/en/linkchecker.1 | 8 ++++---- doc/web/media/man1/linkchecker.1.html | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 56384e9e..230e507e 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -361,24 +361,24 @@ For FTP links we do: 1) connect to the specified host .br 2) try to login with the given user and password. The default -user is ``anonymous``, the default password is ``anonymous@``. +user is \fBanonymous\fP, the default password is \fBanonymous@\fP. .br 3) try to change to the given directory .br 4) list the file with the NLST command .TP -Telnet links (``telnet:``) +Telnet links (\fBtelnet:\fP) We try to connect and if user/password are given, login to the given telnet server. .TP -NNTP links (``news:``, ``snews:``, ``nntp``) +NNTP links (\fBnews:\fP, \fBsnews:\fP, \fBnntp\fP) We try to connect to the given NNTP server. If a news group or article is specified, try to request it from the server. .TP -Unsupported links (``javascript:``, etc.) +Unsupported links (\fBjavascript:\fP, etc.) An unsupported link will only print a warning. No further checking will be made. .IP diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index 4dda688a..fa3d47c2 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -373,24 +373,24 @@ All URLs have to pass a preliminary syntax test. Minor quoting mistakes will 1) connect to the specified host
2) try to login with the given user and password. The default user is - ``anonymous``, the default password is ``anonymous@``. + anonymous, the default password is anonymous@.
3) try to change to the given directory
4) list the file with the NLST command

-
Telnet links (``telnet:``)
+
Telnet links (telnet:)
We try to connect and if user/password are given, login to the given telnet server.

-
NNTP links (``news:``, ``snews:``, ``nntp``)
+
NNTP links (news:, snews:, nntp)
We try to connect to the given NNTP server. If a news group or article is specified, try to request it from the server.

-
Unsupported links (``javascript:``, etc.)
+
Unsupported links (javascript:, etc.)
An unsupported link will only print a warning. No further checking will be made.
From 5dd448cf05ef01156488243b1e6367264b0893c1 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 5/6] Add link to unknownurl.py in man page --- doc/en/linkchecker.1 | 6 +++++- doc/web/media/man1/linkchecker.1.html | 4 ++-- 2 files changed, 7 insertions(+), 3 deletions(-) diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index 230e507e..d6d8902b 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -383,7 +383,11 @@ An unsupported link will only print a warning. No further checking will be made. .IP The complete list of recognized, but unsupported links can be found -in the \fBlinkcheck/checker/unknownurl.py\fP source file. +in the +.UR https://github.com/linkchecker/linkchecker/blob/master/linkcheck/checker/unknownurl.py +linkcheck/checker/unknownurl.py +.UE +source file. The most prominent of them should be JavaScript links. .SH PLUGINS There are two plugin types: connection and content plugins. diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index fa3d47c2..aa56d9b4 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -397,8 +397,8 @@ All URLs have to pass a preliminary syntax test. Minor quoting mistakes will
The complete list of recognized, but unsupported links can be found in the - linkcheck/checker/unknownurl.py source file. The most prominent of - them should be JavaScript links.
+ linkcheck/checker/unknownurl.py + source file. The most prominent of them should be JavaScript links.
From b7c8ad9be714988ecf7e3e0624efc0ef5080e5a5 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 24 Apr 2020 19:46:30 +0100 Subject: [PATCH 6/6] Fix typo for -Dplugin in man page --- doc/en/linkchecker.1 | 2 +- doc/web/media/man1/linkchecker.1.html | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/doc/en/linkchecker.1 b/doc/en/linkchecker.1 index d6d8902b..3a29e5bf 100644 --- a/doc/en/linkchecker.1 +++ b/doc/en/linkchecker.1 @@ -85,7 +85,7 @@ Print available check plugins and exit. \fB\-D\fP\fISTRING\fP, \fB\-\-debug=\fP\fISTRING\fP Print debugging output for the given logger. Available loggers are \fBcmdline\fP, \fBchecking\fP, -\fBcache\fP, \fBdns\fP, \fBplugins\fP and \fBall\fP. +\fBcache\fP, \fBdns\fP, \fBplugin\fP and \fBall\fP. Specifying \fBall\fP is an alias for specifying all available loggers. The option can be given multiple times to debug with more than one logger. diff --git a/doc/web/media/man1/linkchecker.1.html b/doc/web/media/man1/linkchecker.1.html index aa56d9b4..00dd851e 100644 --- a/doc/web/media/man1/linkchecker.1.html +++ b/doc/web/media/man1/linkchecker.1.html @@ -112,7 +112,7 @@ linkchecker - command line client to check HTML documents and websites for
-DSTRING, --debug=STRING
Print debugging output for the given logger. Available loggers are - cmdline, checking, cache, dns, plugins + cmdline, checking, cache, dns, plugin and all. Specifying all is an alias for specifying all available loggers. The option can be given multiple times to debug with more than one logger. For accurate results, threading will be disabled