diff --git a/config/linkcheckerrc b/config/linkcheckerrc index 83e11fe0..767556c6 100644 --- a/config/linkcheckerrc +++ b/config/linkcheckerrc @@ -212,15 +212,17 @@ # ^https?://www\.example\.com/~calvin/ calvin mypass # ^ftp://www\.example\.com/secret/ calvin -# if the website requires a login the URL and optionally the user and -# password CGI fieldnames can be provided. +# if the website requires a login via a page with an HTML form the URL of the +# page and optionally the username and password input element name attributes +# can be provided. #loginurl=http://www.example.com/ -# The name of the user and password CGI field +# The name attributes of the username and password HTML input elements #loginuserfield=login #loginpasswordfield=password -# Optionally any additional CGI name/value pairs. Note that the default -# values are submitted automatically. +# Optionally the name attributes of any additional input elements and the values +# to populate them with. Note that these are submitted without checking +# whether matching input elements exist in the HTML form. #loginextrafields= # name1:value1 # name 2:value 2 diff --git a/doc/en/linkcheckerrc.5 b/doc/en/linkcheckerrc.5 index 49526789..eee74d1d 100644 --- a/doc/en/linkcheckerrc.5 +++ b/doc/en/linkcheckerrc.5 @@ -1,4 +1,4 @@ -.TH LINKCHECKERRC 5 2020-04-24 "LinkChecker" "LinkChecker User Manual" +.TH LINKCHECKERRC 5 2020-06-05 "LinkChecker" "LinkChecker User Manual" .SH NAME linkcheckerrc - configuration file for LinkChecker . @@ -129,35 +129,40 @@ Command line option: \fB\-\-checkextern\fP .SS \fB[authentication]\fP .TP \fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (MULTILINE) -Provide different user/password pairs for different link types. -Entries are a triple (URL regex, username, password) -or a tuple (URL regex, username), where the entries are -separated by whitespace. +Provide individual username/password pairs for different links. In addtion to a +single login page specified with \fBloginurl\fP multiple FTP, +HTTP (Basic Authentication) and telnet links are supported. Entries are a +triple (URL regex, username, password) or a tuple (URL regex, username), +where the entries are separated by whitespace. .br The password is optional and if missing it has to be entered at the commandline. .br -If the regular expression matches the checked URL, the given user/password -pair is used for authentication. The commandline options +If the regular expression matches the checked URL, the given username/password +pair is used for authentication. The command line options \fB\-u\fP and \fB\-p\fP match every link and therefore override the entries -given here. The first match wins. At the moment, authentication is -used/needed for http[s] and ftp links. +given here. The first match wins. .br Command line option: \fB\-u\fP, \fB\-p\fP .TP \fBloginurl=\fP\fIURL\fP -A login URL to be visited before checking. Also needs authentication -data set for it. +The URL of a login page to be visited before link checking. The page is expected +to contain an HTML form to collect credentials and submit them to the address in +its action attribute using an HTTP POST request. +The name attributes of the input elements of the form and the values to be +submitted need to be available (see \fBentry\fP for an explanation of username +and password values). .TP \fBloginuserfield=\fP\fISTRING\fP -The name of the user CGI field. Default name is \fBlogin\fP. +The name attribute of the username input element. Default: \fBlogin\fP. .TP \fBloginpasswordfield=\fP\fISTRING\fP -The name of the password CGI field. Default name is \fBpassword\fP. +The name attribute of the password input element. Default: \fBpassword\fP. .TP \fBloginextrafields=\fP\fINAME\fP\fB:\fP\fIVALUE\fP (MULTILINE) -Optionally any additional CGI name/value pairs. Note that the default -values are submitted automatically. +Optionally the name attributes of any additional input elements and the values +to populate them with. Note that these are submitted without +checking whether matching input elements exist in the HTML form. .SS \fB[output]\fP .TP \fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...] diff --git a/doc/web/media/man5/linkcheckerrc.5.html b/doc/web/media/man5/linkcheckerrc.5.html index 7debade5..72de8e1e 100644 --- a/doc/web/media/man5/linkcheckerrc.5.html +++ b/doc/web/media/man5/linkcheckerrc.5.html @@ -146,30 +146,38 @@ The default file location is ~/.linkchecker/linkcheckerrc on Unix,

entry=REGEX USER [PASS] (MULTILINE)
-
Provide different user/password pairs for different link types. Entries - are a triple (URL regex, username, password) or a tuple (URL regex, - username), where the entries are separated by whitespace. +
Provide individual username/password pairs for different links. In addtion + to a single login page specified with loginurl multiple FTP, HTTP + (Basic Authentication) and telnet links are supported. Entries are a + triple (URL regex, username, password) or a tuple (URL regex, username), + where the entries are separated by whitespace.
The password is optional and if missing it has to be entered at the commandline.
- If the regular expression matches the checked URL, the given user/password - pair is used for authentication. The commandline options -u and - -p match every link and therefore override the entries given here. - The first match wins. At the moment, authentication is used/needed for - http[s] and ftp links. + If the regular expression matches the checked URL, the given + username/password pair is used for authentication. The command line + options -u and -p match every link and therefore override + the entries given here. The first match wins.
Command line option: -u, -p
loginurl=URL
-
A login URL to be visited before checking. Also needs authentication data - set for it.
+
The URL of a login page to be visited before link checking. The page is + expected to contain an HTML form to collect credentials and submit them to + the address in its action attribute using an HTTP POST request. The name + attributes of the input elements of the form and the values to be + submitted need to be available (see entry for an explanation of + username and password values).
loginuserfield=STRING
-
The name of the user CGI field. Default name is login.
+
The name attribute of the username input element. Default: + login.
loginpasswordfield=STRING
-
The name of the password CGI field. Default name is password.
+
The name attribute of the password input element. Default: + password.
loginextrafields=NAME:VALUE (MULTILINE)
-
Optionally any additional CGI name/value pairs. Note that the default - values are submitted automatically.
+
Optionally the name attributes of any additional input elements and the + values to populate them with. Note that these are submitted without + checking whether matching input elements exist in the HTML form.
@@ -639,7 +647,7 @@ Copyright © 2000-2014 Bastian Kleineidam - +
2020-04-242020-06-05 LinkChecker
diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py index e4ffd424..b9fffc22 100644 --- a/linkcheck/configuration/__init__.py +++ b/linkcheck/configuration/__init__.py @@ -302,12 +302,6 @@ class Configuration(dict): """Make login configuration consistent.""" url = self["loginurl"] disable = False - if not self["loginpasswordfield"]: - log.warn(LOG_CHECK, _("no CGI password fieldname given for login URL.")) - disable = True - if not self["loginuserfield"]: - log.warn(LOG_CHECK, _("no CGI user fieldname given for login URL.")) - disable = True if self.get_user_password(url) == (None, None): log.warn( LOG_CHECK, diff --git a/linkcheck/director/__init__.py b/linkcheck/director/__init__.py index e92e3cc1..342d7652 100644 --- a/linkcheck/director/__init__.py +++ b/linkcheck/director/__init__.py @@ -19,7 +19,7 @@ Management of checking a queue of links with several threads. import os import time -from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins +from .. import log, LOG_CHECK, LinkCheckerError, LinkCheckerInterrupt, plugins from ..cache import urlqueue, robots_txt, results from . import aggregator, console @@ -31,6 +31,9 @@ def check_urls(aggregate): """ try: aggregate.visit_loginurl() + except LinkCheckerError as msg: + log.warn(LOG_CHECK, _("Problem using login URL: %(msg)s.") % dict(msg=msg)) + return except Exception as msg: log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % dict(msg=msg)) raise diff --git a/linkcheck/htmlutil/loginformsearch.py b/linkcheck/htmlutil/loginformsearch.py index b4db4b67..77103414 100644 --- a/linkcheck/htmlutil/loginformsearch.py +++ b/linkcheck/htmlutil/loginformsearch.py @@ -38,8 +38,9 @@ class Form: def search_form(content, cgiuser, cgipassword): - """Search for a HTML form in the given HTML content that has the given - CGI fields. If no form is found return None. + """Search for a HTML form in the given HTML content that has input elements + with name attributes that match cgiuser and/or cgipassword. If no such form + is found return None. """ soup = htmlsoup.make_soup(content) cginames = {cgiuser, cgipassword} - {None}