Merge pull request #441 from cjmayo/authentication

Improve documentation of authentication
This commit is contained in:
Chris Mayo 2020-06-23 17:35:19 +01:00 committed by GitHub
commit da22d4886b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
6 changed files with 57 additions and 44 deletions

View file

@ -212,15 +212,17 @@
# ^https?://www\.example\.com/~calvin/ calvin mypass
# ^ftp://www\.example\.com/secret/ calvin
# if the website requires a login the URL and optionally the user and
# password CGI fieldnames can be provided.
# if the website requires a login via a page with an HTML form the URL of the
# page and optionally the username and password input element name attributes
# can be provided.
#loginurl=http://www.example.com/
# The name of the user and password CGI field
# The name attributes of the username and password HTML input elements
#loginuserfield=login
#loginpasswordfield=password
# Optionally any additional CGI name/value pairs. Note that the default
# values are submitted automatically.
# Optionally the name attributes of any additional input elements and the values
# to populate them with. Note that these are submitted without checking
# whether matching input elements exist in the HTML form.
#loginextrafields=
# name1:value1
# name 2:value 2

View file

@ -1,4 +1,4 @@
.TH LINKCHECKERRC 5 2020-04-24 "LinkChecker" "LinkChecker User Manual"
.TH LINKCHECKERRC 5 2020-06-05 "LinkChecker" "LinkChecker User Manual"
.SH NAME
linkcheckerrc - configuration file for LinkChecker
.
@ -129,35 +129,40 @@ Command line option: \fB\-\-checkextern\fP
.SS \fB[authentication]\fP
.TP
\fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (MULTILINE)
Provide different user/password pairs for different link types.
Entries are a triple (URL regex, username, password)
or a tuple (URL regex, username), where the entries are
separated by whitespace.
Provide individual username/password pairs for different links. In addtion to a
single login page specified with \fBloginurl\fP multiple FTP,
HTTP (Basic Authentication) and telnet links are supported. Entries are a
triple (URL regex, username, password) or a tuple (URL regex, username),
where the entries are separated by whitespace.
.br
The password is optional and if missing it has to be entered at the
commandline.
.br
If the regular expression matches the checked URL, the given user/password
pair is used for authentication. The commandline options
If the regular expression matches the checked URL, the given username/password
pair is used for authentication. The command line options
\fB\-u\fP and \fB\-p\fP match every link and therefore override the entries
given here. The first match wins. At the moment, authentication is
used/needed for http[s] and ftp links.
given here. The first match wins.
.br
Command line option: \fB\-u\fP, \fB\-p\fP
.TP
\fBloginurl=\fP\fIURL\fP
A login URL to be visited before checking. Also needs authentication
data set for it.
The URL of a login page to be visited before link checking. The page is expected
to contain an HTML form to collect credentials and submit them to the address in
its action attribute using an HTTP POST request.
The name attributes of the input elements of the form and the values to be
submitted need to be available (see \fBentry\fP for an explanation of username
and password values).
.TP
\fBloginuserfield=\fP\fISTRING\fP
The name of the user CGI field. Default name is \fBlogin\fP.
The name attribute of the username input element. Default: \fBlogin\fP.
.TP
\fBloginpasswordfield=\fP\fISTRING\fP
The name of the password CGI field. Default name is \fBpassword\fP.
The name attribute of the password input element. Default: \fBpassword\fP.
.TP
\fBloginextrafields=\fP\fINAME\fP\fB:\fP\fIVALUE\fP (MULTILINE)
Optionally any additional CGI name/value pairs. Note that the default
values are submitted automatically.
Optionally the name attributes of any additional input elements and the values
to populate them with. Note that these are submitted without
checking whether matching input elements exist in the HTML form.
.SS \fB[output]\fP
.TP
\fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...]

View file

@ -146,30 +146,38 @@ The default file location is <b>~/.linkchecker/linkcheckerrc</b> on Unix,
<h2 class="Ss" id="_fB_authentication__fP"><a class="permalink" href="#_fB_authentication__fP"><b>[authentication]</b></a></h2>
<dl class="Bl-tag">
<dt><b>entry=</b><i>REGEX</i> <i>USER</i> [<i>PASS</i>] (MULTILINE)</dt>
<dd>Provide different user/password pairs for different link types. Entries
are a triple (URL regex, username, password) or a tuple (URL regex,
username), where the entries are separated by whitespace.
<dd>Provide individual username/password pairs for different links. In addtion
to a single login page specified with <b>loginurl</b> multiple FTP, HTTP
(Basic Authentication) and telnet links are supported. Entries are a
triple (URL regex, username, password) or a tuple (URL regex, username),
where the entries are separated by whitespace.
<br/>
The password is optional and if missing it has to be entered at the
commandline.
<br/>
If the regular expression matches the checked URL, the given user/password
pair is used for authentication. The commandline options <b>-u</b> and
<b>-p</b> match every link and therefore override the entries given here.
The first match wins. At the moment, authentication is used/needed for
http[s] and ftp links.
If the regular expression matches the checked URL, the given
username/password pair is used for authentication. The command line
options <b>-u</b> and <b>-p</b> match every link and therefore override
the entries given here. The first match wins.
<br/>
Command line option: <b>-u</b>, <b>-p</b></dd>
<dt><b>loginurl=</b><i>URL</i></dt>
<dd>A login URL to be visited before checking. Also needs authentication data
set for it.</dd>
<dd>The URL of a login page to be visited before link checking. The page is
expected to contain an HTML form to collect credentials and submit them to
the address in its action attribute using an HTTP POST request. The name
attributes of the input elements of the form and the values to be
submitted need to be available (see <b>entry</b> for an explanation of
username and password values).</dd>
<dt><b>loginuserfield=</b><i>STRING</i></dt>
<dd>The name of the user CGI field. Default name is <b>login</b>.</dd>
<dd>The name attribute of the username input element. Default:
<b>login</b>.</dd>
<dt><b>loginpasswordfield=</b><i>STRING</i></dt>
<dd>The name of the password CGI field. Default name is <b>password</b>.</dd>
<dd>The name attribute of the password input element. Default:
<b>password</b>.</dd>
<dt><b>loginextrafields=</b><i>NAME</i><b>:</b><i>VALUE</i> (MULTILINE)</dt>
<dd>Optionally any additional CGI name/value pairs. Note that the default
values are submitted automatically.</dd>
<dd>Optionally the name attributes of any additional input elements and the
values to populate them with. Note that these are submitted without
checking whether matching input elements exist in the HTML form.</dd>
</dl>
</section>
<section class="Ss">
@ -639,7 +647,7 @@ Copyright &#x00A9; 2000-2014 Bastian Kleineidam
</div>
<table class="foot">
<tr>
<td class="foot-date">2020-04-24</td>
<td class="foot-date">2020-06-05</td>
<td class="foot-os">LinkChecker</td>
</tr>
</table>

View file

@ -302,12 +302,6 @@ class Configuration(dict):
"""Make login configuration consistent."""
url = self["loginurl"]
disable = False
if not self["loginpasswordfield"]:
log.warn(LOG_CHECK, _("no CGI password fieldname given for login URL."))
disable = True
if not self["loginuserfield"]:
log.warn(LOG_CHECK, _("no CGI user fieldname given for login URL."))
disable = True
if self.get_user_password(url) == (None, None):
log.warn(
LOG_CHECK,

View file

@ -19,7 +19,7 @@ Management of checking a queue of links with several threads.
import os
import time
from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins
from .. import log, LOG_CHECK, LinkCheckerError, LinkCheckerInterrupt, plugins
from ..cache import urlqueue, robots_txt, results
from . import aggregator, console
@ -31,6 +31,9 @@ def check_urls(aggregate):
"""
try:
aggregate.visit_loginurl()
except LinkCheckerError as msg:
log.warn(LOG_CHECK, _("Problem using login URL: %(msg)s.") % dict(msg=msg))
return
except Exception as msg:
log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % dict(msg=msg))
raise

View file

@ -38,8 +38,9 @@ class Form:
def search_form(content, cgiuser, cgipassword):
"""Search for a HTML form in the given HTML content that has the given
CGI fields. If no form is found return None.
"""Search for a HTML form in the given HTML content that has input elements
with name attributes that match cgiuser and/or cgipassword. If no such form
is found return None.
"""
soup = htmlsoup.make_soup(content)
cginames = {cgiuser, cgipassword} - {None}