diff --git a/config/linkcheckerrc b/config/linkcheckerrc
index 83e11fe0..767556c6 100644
--- a/config/linkcheckerrc
+++ b/config/linkcheckerrc
@@ -212,15 +212,17 @@
# ^https?://www\.example\.com/~calvin/ calvin mypass
# ^ftp://www\.example\.com/secret/ calvin
-# if the website requires a login the URL and optionally the user and
-# password CGI fieldnames can be provided.
+# if the website requires a login via a page with an HTML form the URL of the
+# page and optionally the username and password input element name attributes
+# can be provided.
#loginurl=http://www.example.com/
-# The name of the user and password CGI field
+# The name attributes of the username and password HTML input elements
#loginuserfield=login
#loginpasswordfield=password
-# Optionally any additional CGI name/value pairs. Note that the default
-# values are submitted automatically.
+# Optionally the name attributes of any additional input elements and the values
+# to populate them with. Note that these are submitted without checking
+# whether matching input elements exist in the HTML form.
#loginextrafields=
# name1:value1
# name 2:value 2
diff --git a/doc/en/linkcheckerrc.5 b/doc/en/linkcheckerrc.5
index 49526789..eee74d1d 100644
--- a/doc/en/linkcheckerrc.5
+++ b/doc/en/linkcheckerrc.5
@@ -1,4 +1,4 @@
-.TH LINKCHECKERRC 5 2020-04-24 "LinkChecker" "LinkChecker User Manual"
+.TH LINKCHECKERRC 5 2020-06-05 "LinkChecker" "LinkChecker User Manual"
.SH NAME
linkcheckerrc - configuration file for LinkChecker
.
@@ -129,35 +129,40 @@ Command line option: \fB\-\-checkextern\fP
.SS \fB[authentication]\fP
.TP
\fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (MULTILINE)
-Provide different user/password pairs for different link types.
-Entries are a triple (URL regex, username, password)
-or a tuple (URL regex, username), where the entries are
-separated by whitespace.
+Provide individual username/password pairs for different links. In addtion to a
+single login page specified with \fBloginurl\fP multiple FTP,
+HTTP (Basic Authentication) and telnet links are supported. Entries are a
+triple (URL regex, username, password) or a tuple (URL regex, username),
+where the entries are separated by whitespace.
.br
The password is optional and if missing it has to be entered at the
commandline.
.br
-If the regular expression matches the checked URL, the given user/password
-pair is used for authentication. The commandline options
+If the regular expression matches the checked URL, the given username/password
+pair is used for authentication. The command line options
\fB\-u\fP and \fB\-p\fP match every link and therefore override the entries
-given here. The first match wins. At the moment, authentication is
-used/needed for http[s] and ftp links.
+given here. The first match wins.
.br
Command line option: \fB\-u\fP, \fB\-p\fP
.TP
\fBloginurl=\fP\fIURL\fP
-A login URL to be visited before checking. Also needs authentication
-data set for it.
+The URL of a login page to be visited before link checking. The page is expected
+to contain an HTML form to collect credentials and submit them to the address in
+its action attribute using an HTTP POST request.
+The name attributes of the input elements of the form and the values to be
+submitted need to be available (see \fBentry\fP for an explanation of username
+and password values).
.TP
\fBloginuserfield=\fP\fISTRING\fP
-The name of the user CGI field. Default name is \fBlogin\fP.
+The name attribute of the username input element. Default: \fBlogin\fP.
.TP
\fBloginpasswordfield=\fP\fISTRING\fP
-The name of the password CGI field. Default name is \fBpassword\fP.
+The name attribute of the password input element. Default: \fBpassword\fP.
.TP
\fBloginextrafields=\fP\fINAME\fP\fB:\fP\fIVALUE\fP (MULTILINE)
-Optionally any additional CGI name/value pairs. Note that the default
-values are submitted automatically.
+Optionally the name attributes of any additional input elements and the values
+to populate them with. Note that these are submitted without
+checking whether matching input elements exist in the HTML form.
.SS \fB[output]\fP
.TP
\fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...]
diff --git a/doc/web/media/man5/linkcheckerrc.5.html b/doc/web/media/man5/linkcheckerrc.5.html
index 7debade5..72de8e1e 100644
--- a/doc/web/media/man5/linkcheckerrc.5.html
+++ b/doc/web/media/man5/linkcheckerrc.5.html
@@ -146,30 +146,38 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,
- entry=REGEX USER [PASS] (MULTILINE)
- - Provide different user/password pairs for different link types. Entries
- are a triple (URL regex, username, password) or a tuple (URL regex,
- username), where the entries are separated by whitespace.
+
- Provide individual username/password pairs for different links. In addtion
+ to a single login page specified with loginurl multiple FTP, HTTP
+ (Basic Authentication) and telnet links are supported. Entries are a
+ triple (URL regex, username, password) or a tuple (URL regex, username),
+ where the entries are separated by whitespace.
The password is optional and if missing it has to be entered at the
commandline.
- If the regular expression matches the checked URL, the given user/password
- pair is used for authentication. The commandline options -u and
- -p match every link and therefore override the entries given here.
- The first match wins. At the moment, authentication is used/needed for
- http[s] and ftp links.
+ If the regular expression matches the checked URL, the given
+ username/password pair is used for authentication. The command line
+ options -u and -p match every link and therefore override
+ the entries given here. The first match wins.
Command line option: -u, -p
- loginurl=URL
- - A login URL to be visited before checking. Also needs authentication data
- set for it.
+ - The URL of a login page to be visited before link checking. The page is
+ expected to contain an HTML form to collect credentials and submit them to
+ the address in its action attribute using an HTTP POST request. The name
+ attributes of the input elements of the form and the values to be
+ submitted need to be available (see entry for an explanation of
+ username and password values).
- loginuserfield=STRING
- - The name of the user CGI field. Default name is login.
+ - The name attribute of the username input element. Default:
+ login.
- loginpasswordfield=STRING
- - The name of the password CGI field. Default name is password.
+ - The name attribute of the password input element. Default:
+ password.
- loginextrafields=NAME:VALUE (MULTILINE)
- - Optionally any additional CGI name/value pairs. Note that the default
- values are submitted automatically.
+ - Optionally the name attributes of any additional input elements and the
+ values to populate them with. Note that these are submitted without
+ checking whether matching input elements exist in the HTML form.
@@ -639,7 +647,7 @@ Copyright © 2000-2014 Bastian Kleineidam
diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py
index e4ffd424..b9fffc22 100644
--- a/linkcheck/configuration/__init__.py
+++ b/linkcheck/configuration/__init__.py
@@ -302,12 +302,6 @@ class Configuration(dict):
"""Make login configuration consistent."""
url = self["loginurl"]
disable = False
- if not self["loginpasswordfield"]:
- log.warn(LOG_CHECK, _("no CGI password fieldname given for login URL."))
- disable = True
- if not self["loginuserfield"]:
- log.warn(LOG_CHECK, _("no CGI user fieldname given for login URL."))
- disable = True
if self.get_user_password(url) == (None, None):
log.warn(
LOG_CHECK,
diff --git a/linkcheck/director/__init__.py b/linkcheck/director/__init__.py
index e92e3cc1..342d7652 100644
--- a/linkcheck/director/__init__.py
+++ b/linkcheck/director/__init__.py
@@ -19,7 +19,7 @@ Management of checking a queue of links with several threads.
import os
import time
-from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins
+from .. import log, LOG_CHECK, LinkCheckerError, LinkCheckerInterrupt, plugins
from ..cache import urlqueue, robots_txt, results
from . import aggregator, console
@@ -31,6 +31,9 @@ def check_urls(aggregate):
"""
try:
aggregate.visit_loginurl()
+ except LinkCheckerError as msg:
+ log.warn(LOG_CHECK, _("Problem using login URL: %(msg)s.") % dict(msg=msg))
+ return
except Exception as msg:
log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % dict(msg=msg))
raise
diff --git a/linkcheck/htmlutil/loginformsearch.py b/linkcheck/htmlutil/loginformsearch.py
index b4db4b67..77103414 100644
--- a/linkcheck/htmlutil/loginformsearch.py
+++ b/linkcheck/htmlutil/loginformsearch.py
@@ -38,8 +38,9 @@ class Form:
def search_form(content, cgiuser, cgipassword):
- """Search for a HTML form in the given HTML content that has the given
- CGI fields. If no form is found return None.
+ """Search for a HTML form in the given HTML content that has input elements
+ with name attributes that match cgiuser and/or cgipassword. If no such form
+ is found return None.
"""
soup = htmlsoup.make_soup(content)
cginames = {cgiuser, cgipassword} - {None}