mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-07 00:00:58 +00:00
Merge pull request #441 from cjmayo/authentication
Improve documentation of authentication
This commit is contained in:
commit
da22d4886b
6 changed files with 57 additions and 44 deletions
|
|
@ -212,15 +212,17 @@
|
|||
# ^https?://www\.example\.com/~calvin/ calvin mypass
|
||||
# ^ftp://www\.example\.com/secret/ calvin
|
||||
|
||||
# if the website requires a login the URL and optionally the user and
|
||||
# password CGI fieldnames can be provided.
|
||||
# if the website requires a login via a page with an HTML form the URL of the
|
||||
# page and optionally the username and password input element name attributes
|
||||
# can be provided.
|
||||
#loginurl=http://www.example.com/
|
||||
|
||||
# The name of the user and password CGI field
|
||||
# The name attributes of the username and password HTML input elements
|
||||
#loginuserfield=login
|
||||
#loginpasswordfield=password
|
||||
# Optionally any additional CGI name/value pairs. Note that the default
|
||||
# values are submitted automatically.
|
||||
# Optionally the name attributes of any additional input elements and the values
|
||||
# to populate them with. Note that these are submitted without checking
|
||||
# whether matching input elements exist in the HTML form.
|
||||
#loginextrafields=
|
||||
# name1:value1
|
||||
# name 2:value 2
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
.TH LINKCHECKERRC 5 2020-04-24 "LinkChecker" "LinkChecker User Manual"
|
||||
.TH LINKCHECKERRC 5 2020-06-05 "LinkChecker" "LinkChecker User Manual"
|
||||
.SH NAME
|
||||
linkcheckerrc - configuration file for LinkChecker
|
||||
.
|
||||
|
|
@ -129,35 +129,40 @@ Command line option: \fB\-\-checkextern\fP
|
|||
.SS \fB[authentication]\fP
|
||||
.TP
|
||||
\fBentry=\fP\fIREGEX\fP \fIUSER\fP [\fIPASS\fP] (MULTILINE)
|
||||
Provide different user/password pairs for different link types.
|
||||
Entries are a triple (URL regex, username, password)
|
||||
or a tuple (URL regex, username), where the entries are
|
||||
separated by whitespace.
|
||||
Provide individual username/password pairs for different links. In addtion to a
|
||||
single login page specified with \fBloginurl\fP multiple FTP,
|
||||
HTTP (Basic Authentication) and telnet links are supported. Entries are a
|
||||
triple (URL regex, username, password) or a tuple (URL regex, username),
|
||||
where the entries are separated by whitespace.
|
||||
.br
|
||||
The password is optional and if missing it has to be entered at the
|
||||
commandline.
|
||||
.br
|
||||
If the regular expression matches the checked URL, the given user/password
|
||||
pair is used for authentication. The commandline options
|
||||
If the regular expression matches the checked URL, the given username/password
|
||||
pair is used for authentication. The command line options
|
||||
\fB\-u\fP and \fB\-p\fP match every link and therefore override the entries
|
||||
given here. The first match wins. At the moment, authentication is
|
||||
used/needed for http[s] and ftp links.
|
||||
given here. The first match wins.
|
||||
.br
|
||||
Command line option: \fB\-u\fP, \fB\-p\fP
|
||||
.TP
|
||||
\fBloginurl=\fP\fIURL\fP
|
||||
A login URL to be visited before checking. Also needs authentication
|
||||
data set for it.
|
||||
The URL of a login page to be visited before link checking. The page is expected
|
||||
to contain an HTML form to collect credentials and submit them to the address in
|
||||
its action attribute using an HTTP POST request.
|
||||
The name attributes of the input elements of the form and the values to be
|
||||
submitted need to be available (see \fBentry\fP for an explanation of username
|
||||
and password values).
|
||||
.TP
|
||||
\fBloginuserfield=\fP\fISTRING\fP
|
||||
The name of the user CGI field. Default name is \fBlogin\fP.
|
||||
The name attribute of the username input element. Default: \fBlogin\fP.
|
||||
.TP
|
||||
\fBloginpasswordfield=\fP\fISTRING\fP
|
||||
The name of the password CGI field. Default name is \fBpassword\fP.
|
||||
The name attribute of the password input element. Default: \fBpassword\fP.
|
||||
.TP
|
||||
\fBloginextrafields=\fP\fINAME\fP\fB:\fP\fIVALUE\fP (MULTILINE)
|
||||
Optionally any additional CGI name/value pairs. Note that the default
|
||||
values are submitted automatically.
|
||||
Optionally the name attributes of any additional input elements and the values
|
||||
to populate them with. Note that these are submitted without
|
||||
checking whether matching input elements exist in the HTML form.
|
||||
.SS \fB[output]\fP
|
||||
.TP
|
||||
\fBdebug=\fP\fISTRING\fP[\fB,\fP\fISTRING\fP...]
|
||||
|
|
|
|||
|
|
@ -146,30 +146,38 @@ The default file location is <b>~/.linkchecker/linkcheckerrc</b> on Unix,
|
|||
<h2 class="Ss" id="_fB_authentication__fP"><a class="permalink" href="#_fB_authentication__fP"><b>[authentication]</b></a></h2>
|
||||
<dl class="Bl-tag">
|
||||
<dt><b>entry=</b><i>REGEX</i> <i>USER</i> [<i>PASS</i>] (MULTILINE)</dt>
|
||||
<dd>Provide different user/password pairs for different link types. Entries
|
||||
are a triple (URL regex, username, password) or a tuple (URL regex,
|
||||
username), where the entries are separated by whitespace.
|
||||
<dd>Provide individual username/password pairs for different links. In addtion
|
||||
to a single login page specified with <b>loginurl</b> multiple FTP, HTTP
|
||||
(Basic Authentication) and telnet links are supported. Entries are a
|
||||
triple (URL regex, username, password) or a tuple (URL regex, username),
|
||||
where the entries are separated by whitespace.
|
||||
<br/>
|
||||
The password is optional and if missing it has to be entered at the
|
||||
commandline.
|
||||
<br/>
|
||||
If the regular expression matches the checked URL, the given user/password
|
||||
pair is used for authentication. The commandline options <b>-u</b> and
|
||||
<b>-p</b> match every link and therefore override the entries given here.
|
||||
The first match wins. At the moment, authentication is used/needed for
|
||||
http[s] and ftp links.
|
||||
If the regular expression matches the checked URL, the given
|
||||
username/password pair is used for authentication. The command line
|
||||
options <b>-u</b> and <b>-p</b> match every link and therefore override
|
||||
the entries given here. The first match wins.
|
||||
<br/>
|
||||
Command line option: <b>-u</b>, <b>-p</b></dd>
|
||||
<dt><b>loginurl=</b><i>URL</i></dt>
|
||||
<dd>A login URL to be visited before checking. Also needs authentication data
|
||||
set for it.</dd>
|
||||
<dd>The URL of a login page to be visited before link checking. The page is
|
||||
expected to contain an HTML form to collect credentials and submit them to
|
||||
the address in its action attribute using an HTTP POST request. The name
|
||||
attributes of the input elements of the form and the values to be
|
||||
submitted need to be available (see <b>entry</b> for an explanation of
|
||||
username and password values).</dd>
|
||||
<dt><b>loginuserfield=</b><i>STRING</i></dt>
|
||||
<dd>The name of the user CGI field. Default name is <b>login</b>.</dd>
|
||||
<dd>The name attribute of the username input element. Default:
|
||||
<b>login</b>.</dd>
|
||||
<dt><b>loginpasswordfield=</b><i>STRING</i></dt>
|
||||
<dd>The name of the password CGI field. Default name is <b>password</b>.</dd>
|
||||
<dd>The name attribute of the password input element. Default:
|
||||
<b>password</b>.</dd>
|
||||
<dt><b>loginextrafields=</b><i>NAME</i><b>:</b><i>VALUE</i> (MULTILINE)</dt>
|
||||
<dd>Optionally any additional CGI name/value pairs. Note that the default
|
||||
values are submitted automatically.</dd>
|
||||
<dd>Optionally the name attributes of any additional input elements and the
|
||||
values to populate them with. Note that these are submitted without
|
||||
checking whether matching input elements exist in the HTML form.</dd>
|
||||
</dl>
|
||||
</section>
|
||||
<section class="Ss">
|
||||
|
|
@ -639,7 +647,7 @@ Copyright © 2000-2014 Bastian Kleineidam
|
|||
</div>
|
||||
<table class="foot">
|
||||
<tr>
|
||||
<td class="foot-date">2020-04-24</td>
|
||||
<td class="foot-date">2020-06-05</td>
|
||||
<td class="foot-os">LinkChecker</td>
|
||||
</tr>
|
||||
</table>
|
||||
|
|
|
|||
|
|
@ -302,12 +302,6 @@ class Configuration(dict):
|
|||
"""Make login configuration consistent."""
|
||||
url = self["loginurl"]
|
||||
disable = False
|
||||
if not self["loginpasswordfield"]:
|
||||
log.warn(LOG_CHECK, _("no CGI password fieldname given for login URL."))
|
||||
disable = True
|
||||
if not self["loginuserfield"]:
|
||||
log.warn(LOG_CHECK, _("no CGI user fieldname given for login URL."))
|
||||
disable = True
|
||||
if self.get_user_password(url) == (None, None):
|
||||
log.warn(
|
||||
LOG_CHECK,
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Management of checking a queue of links with several threads.
|
|||
import os
|
||||
import time
|
||||
|
||||
from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins
|
||||
from .. import log, LOG_CHECK, LinkCheckerError, LinkCheckerInterrupt, plugins
|
||||
from ..cache import urlqueue, robots_txt, results
|
||||
from . import aggregator, console
|
||||
|
||||
|
|
@ -31,6 +31,9 @@ def check_urls(aggregate):
|
|||
"""
|
||||
try:
|
||||
aggregate.visit_loginurl()
|
||||
except LinkCheckerError as msg:
|
||||
log.warn(LOG_CHECK, _("Problem using login URL: %(msg)s.") % dict(msg=msg))
|
||||
return
|
||||
except Exception as msg:
|
||||
log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % dict(msg=msg))
|
||||
raise
|
||||
|
|
|
|||
|
|
@ -38,8 +38,9 @@ class Form:
|
|||
|
||||
|
||||
def search_form(content, cgiuser, cgipassword):
|
||||
"""Search for a HTML form in the given HTML content that has the given
|
||||
CGI fields. If no form is found return None.
|
||||
"""Search for a HTML form in the given HTML content that has input elements
|
||||
with name attributes that match cgiuser and/or cgipassword. If no such form
|
||||
is found return None.
|
||||
"""
|
||||
soup = htmlsoup.make_soup(content)
|
||||
cginames = {cgiuser, cgipassword} - {None}
|
||||
|
|
|
|||
Loading…
Reference in a new issue