mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-05 07:20:58 +00:00
Merge pull request #752 from cjmayo/deprecated_modules
Remove support for nntp and telnet
This commit is contained in:
commit
0faccf2ab3
26 changed files with 44 additions and 626 deletions
|
|
@ -15,10 +15,10 @@ Features
|
|||
|
||||
- recursive and multithreaded checking and site crawling
|
||||
- output in colored or normal text, HTML, SQL, CSV, XML or a sitemap graph in different formats
|
||||
- HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Telnet and local file links support
|
||||
- HTTP/1.1, HTTPS, FTP, mailto: and local file links support
|
||||
- restrict link checking with regular expression filters for URLs
|
||||
- proxy support
|
||||
- username/password authorization for HTTP, FTP and Telnet
|
||||
- username/password authorization for HTTP and FTP
|
||||
- honors robots.txt exclusion protocol
|
||||
- Cookie support
|
||||
- HTML5 support
|
||||
|
|
|
|||
|
|
@ -43,21 +43,17 @@ according to the URL scheme.
|
|||
"6" [label="InternPatternUrl", shape="record", href="../code/linkcheck/linkcheck.checker.internpaturl.html", target="_blank"];
|
||||
"7" [label="ItmsServicesUrl", shape="record", href="../code/linkcheck/linkcheck.checker.itmsservicesurl.html", target="_blank"];
|
||||
"8" [label="MailtoUrl", shape="record", href="../code/linkcheck/linkcheck.checker.mailtourl.html", target="_blank"];
|
||||
"9" [label="NntpUrl", shape="record", href="../code/linkcheck/linkcheck.checker.nntpurl.html", target="_blank"];
|
||||
"10" [label="TelnetUrl", shape="record", href="../code/linkcheck/linkcheck.checker.telneturl.html", target="_blank"];
|
||||
"11" [label="UnknownUrl", shape="record", href="../code/linkcheck/linkcheck.checker.unknownurl.html", target="_blank"];
|
||||
"12" [label="UrlBase", shape="record", href="../code/linkcheck/linkcheck.checker.urlbase.html", target="_blank"];
|
||||
"1" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"2" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"9" [label="UnknownUrl", shape="record", href="../code/linkcheck/linkcheck.checker.unknownurl.html", target="_blank"];
|
||||
"10" [label="UrlBase", shape="record", href="../code/linkcheck/linkcheck.checker.urlbase.html", target="_blank"];
|
||||
"1" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
"2" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
"3" -> "6" [arrowhead="empty", arrowtail="none"];
|
||||
"4" -> "6" [arrowhead="empty", arrowtail="none"];
|
||||
"5" -> "11" [arrowhead="empty", arrowtail="none"];
|
||||
"6" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"7" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"8" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"9" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"10" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"11" -> "12" [arrowhead="empty", arrowtail="none"];
|
||||
"6" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
"7" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
"8" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
"9" -> "10" [arrowhead="empty", arrowtail="none"];
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -48,11 +48,10 @@ Features
|
|||
- recursive and multithreaded checking and site crawling
|
||||
- output in colored or normal text, HTML, SQL, CSV, XML or a sitemap
|
||||
graph in different formats
|
||||
- HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Telnet and local file
|
||||
links support
|
||||
- HTTP/1.1, HTTPS, FTP, mailto: and local file links support
|
||||
- restriction of link checking with regular expression filters for URLs
|
||||
- proxy support
|
||||
- username/password authorization for HTTP and FTP and Telnet
|
||||
- username/password authorization for HTTP and FTP
|
||||
- honors robots.txt exclusion protocol
|
||||
- Cookie support
|
||||
- HTML5 support
|
||||
|
|
|
|||
|
|
@ -16,11 +16,10 @@ LinkChecker features
|
|||
- recursive and multithreaded checking
|
||||
- output in colored or normal text, HTML, SQL, CSV, XML or a sitemap
|
||||
graph in different formats
|
||||
- support for HTTP/1.1, HTTPS, FTP, mailto:, news:, nntp:, Telnet and
|
||||
local file links
|
||||
- support for HTTP/1.1, HTTPS, FTP, mailto: and local file links
|
||||
- restriction of link checking with URL filters
|
||||
- proxy support
|
||||
- username/password authorization for HTTP, FTP and Telnet
|
||||
- username/password authorization for HTTP and FTP
|
||||
- support for robots.txt exclusion protocol
|
||||
- support for Cookies
|
||||
- support for HTML5
|
||||
|
|
@ -188,12 +187,6 @@ Checking options
|
|||
This option can be given multiple times.
|
||||
See section `REGULAR EXPRESSIONS`_ for more info.
|
||||
|
||||
.. option:: -N STRING, --nntp-server=STRING
|
||||
|
||||
Specify an NNTP server for news: links. Default is the
|
||||
environment variable :envvar:`NNTP_SERVER`. If no host is given, only the
|
||||
syntax of the link is checked.
|
||||
|
||||
.. option:: --no-follow-url=REGEX
|
||||
|
||||
Check but do not recurse into URLs matching the given regular
|
||||
|
|
@ -409,14 +402,6 @@ FTP links (**ftp:**)
|
|||
3. try to change to the given directory
|
||||
4. list the file with the NLST command
|
||||
|
||||
Telnet links (**telnet:**)
|
||||
We try to connect and if user/password are given, login to the given
|
||||
telnet server.
|
||||
|
||||
NNTP links (**news:**, **snews:**, **nntp**)
|
||||
We try to connect to the given NNTP server. If a news group or
|
||||
article is specified, try to request it from the server.
|
||||
|
||||
Unsupported links (**javascript:**, etc.)
|
||||
An unsupported link will only print a warning. No further checking
|
||||
will be made.
|
||||
|
|
@ -489,16 +474,9 @@ automatically.
|
|||
|
||||
You can supply multiple user/password pairs in a configuration file.
|
||||
|
||||
When checking **news:** links the given NNTP host doesn't need to be the
|
||||
same as the host of the user browsing your pages.
|
||||
|
||||
ENVIRONMENT
|
||||
-----------
|
||||
|
||||
.. envvar:: NNTP_SERVER
|
||||
|
||||
specifies default NNTP server
|
||||
|
||||
.. envvar:: http_proxy
|
||||
|
||||
specifies default HTTP proxy server
|
||||
|
|
|
|||
|
|
@ -33,11 +33,6 @@ checking
|
|||
slash to join directories instead of a backslash. And the given
|
||||
directory must end with a slash.
|
||||
Command line option: none
|
||||
**nntpserver=**\ *STRING*
|
||||
Specify an NNTP server for **news:** links. Default is the
|
||||
environment variable :envvar:`NNTP_SERVER`. If no host is given, only the
|
||||
syntax of the link is checked.
|
||||
Command line option: :option:`--nntp-server`
|
||||
**recursionlevel=**\ *NUMBER*
|
||||
Check recursively all links up to given depth. A negative depth will
|
||||
enable infinite recursion. Default depth is infinite.
|
||||
|
|
@ -136,7 +131,7 @@ authentication
|
|||
**entry=**\ *REGEX* *USER* [*PASS*] (`MULTILINE`_)
|
||||
Provide individual username/password pairs for different links. In
|
||||
addition to a single login page specified with **loginurl** multiple
|
||||
FTP, HTTP (Basic Authentication) and telnet links are supported.
|
||||
FTP and HTTP (Basic Authentication) links are supported.
|
||||
Entries are a triple (URL regex, username, password) or a tuple (URL
|
||||
regex, username), where the entries are separated by whitespace.
|
||||
The password is optional and if missing it has to be entered at the
|
||||
|
|
@ -592,10 +587,6 @@ file entry:
|
|||
Redirected to a different URL.
|
||||
**mail-no-mx-host**
|
||||
The mail MX host could not be found.
|
||||
**nntp-no-newsgroup**
|
||||
The NNTP newsgroup could not be found.
|
||||
**nntp-no-server**
|
||||
No NNTP server was found.
|
||||
**url-content-size-zero**
|
||||
The URL content size is zero.
|
||||
**url-content-too-large**
|
||||
|
|
|
|||
|
|
@ -160,12 +160,8 @@ def get_urlclass_from(scheme, assume_local_file=False):
|
|||
klass = ftpurl.FtpUrl
|
||||
elif scheme == "file":
|
||||
klass = fileurl.FileUrl
|
||||
elif scheme == "telnet":
|
||||
klass = telneturl.TelnetUrl
|
||||
elif scheme == "mailto":
|
||||
klass = mailtourl.MailtoUrl
|
||||
elif scheme in ("nntp", "news", "snews"):
|
||||
klass = nntpurl.NntpUrl
|
||||
elif scheme == "dns":
|
||||
klass = dnsurl.DnsUrl
|
||||
elif scheme == "itms-services":
|
||||
|
|
@ -210,8 +206,6 @@ from . import ( # noqa: E402
|
|||
httpurl,
|
||||
dnsurl,
|
||||
mailtourl,
|
||||
telneturl,
|
||||
nntpurl,
|
||||
ignoreurl,
|
||||
itmsservicesurl,
|
||||
)
|
||||
|
|
|
|||
|
|
@ -17,7 +17,6 @@
|
|||
Helper constants.
|
||||
"""
|
||||
import socket
|
||||
import nntplib
|
||||
import ftplib
|
||||
import requests
|
||||
from .. import LinkCheckerError
|
||||
|
|
@ -36,8 +35,6 @@ ExcCacheList = [
|
|||
OSError, # OSError is thrown on Windows when a file is not found
|
||||
LinkCheckerError,
|
||||
DNSException,
|
||||
# nttp errors (including EOFError)
|
||||
nntplib.NNTPError,
|
||||
EOFError,
|
||||
# http errors
|
||||
requests.exceptions.RequestException,
|
||||
|
|
@ -95,8 +92,6 @@ WARN_HTTP_COOKIE_STORE_ERROR = "http-cookie-store-error"
|
|||
WARN_HTTP_RATE_LIMITED = "http-rate-limited"
|
||||
WARN_HTTP_REDIRECTED = "http-redirected"
|
||||
WARN_MAIL_NO_MX_HOST = "mail-no-mx-host"
|
||||
WARN_NNTP_NO_SERVER = "nntp-no-server"
|
||||
WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup"
|
||||
WARN_XML_PARSE_ERROR = "xml-parse-error"
|
||||
|
||||
# registered warnings
|
||||
|
|
@ -120,8 +115,6 @@ Warnings = {
|
|||
WARN_HTTP_RATE_LIMITED: _("The URL request was rate limited."),
|
||||
WARN_HTTP_REDIRECTED: _("Redirected to a different URL."),
|
||||
WARN_MAIL_NO_MX_HOST: _("The mail MX host could not be found."),
|
||||
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
|
||||
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),
|
||||
WARN_URL_OBFUSCATED_IP: _("The IP is obfuscated."),
|
||||
WARN_XML_PARSE_ERROR: _("XML could not be parsed."),
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,109 +0,0 @@
|
|||
# Copyright (C) 2000-2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Handle nntp: and news: links.
|
||||
"""
|
||||
|
||||
import re
|
||||
import time
|
||||
import nntplib
|
||||
import random
|
||||
|
||||
from . import urlbase
|
||||
from .. import log, LinkCheckerError, LOG_CHECK
|
||||
from .const import WARN_NNTP_NO_SERVER, WARN_NNTP_NO_NEWSGROUP
|
||||
|
||||
random.seed()
|
||||
|
||||
|
||||
class NntpUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with NNTP scheme.
|
||||
"""
|
||||
|
||||
def check_connection(self):
|
||||
"""
|
||||
Connect to NNTP server and try to request the URL article
|
||||
resource (if specified).
|
||||
"""
|
||||
nntpserver = self.host or self.aggregate.config["nntpserver"]
|
||||
if not nntpserver:
|
||||
self.add_warning(
|
||||
_("No NNTP server was specified, skipping this URL."),
|
||||
tag=WARN_NNTP_NO_SERVER,
|
||||
)
|
||||
return
|
||||
nntp = self._connect_nntp(nntpserver)
|
||||
group = self.urlparts[2]
|
||||
while group[:1] == '/':
|
||||
group = group[1:]
|
||||
if '@' in group:
|
||||
# request article info (resp, number mid)
|
||||
number = nntp.stat("<" + group + ">")[1]
|
||||
self.add_info(_('Article number %(num)s found.') % {"num": number})
|
||||
else:
|
||||
# split off trailing article span
|
||||
group = group.split('/', 1)[0]
|
||||
if group:
|
||||
# request group info (resp, count, first, last, name)
|
||||
name = nntp.group(group)[4]
|
||||
self.add_info(_("News group %(name)s found.") % {"name": name})
|
||||
else:
|
||||
# group name is the empty string
|
||||
self.add_warning(
|
||||
_("No newsgroup specified in NNTP URL."), tag=WARN_NNTP_NO_NEWSGROUP
|
||||
)
|
||||
|
||||
def _connect_nntp(self, nntpserver):
|
||||
"""
|
||||
This is done only once per checking task. Also, the newly
|
||||
introduced error codes 504 and 505 (both inclining "Too busy, retry
|
||||
later", are caught.
|
||||
"""
|
||||
tries = 0
|
||||
nntp = None
|
||||
while tries < 2:
|
||||
tries += 1
|
||||
try:
|
||||
nntp = nntplib.NNTP(nntpserver, usenetrc=False)
|
||||
except nntplib.NNTPTemporaryError:
|
||||
self.wait()
|
||||
except nntplib.NNTPPermanentError as msg:
|
||||
if re.compile("^50[45]").search(str(msg)):
|
||||
self.wait()
|
||||
else:
|
||||
raise
|
||||
if nntp is None:
|
||||
raise LinkCheckerError(
|
||||
_("NNTP server too busy; tried more than %d times.") % tries
|
||||
)
|
||||
if log.is_debug(LOG_CHECK):
|
||||
nntp.set_debuglevel(1)
|
||||
self.add_info(nntp.getwelcome())
|
||||
return nntp
|
||||
|
||||
def wait(self):
|
||||
"""Wait some time before trying to connect again."""
|
||||
time.sleep(random.randrange(10, 30))
|
||||
|
||||
def can_get_content(self):
|
||||
"""
|
||||
NNTP urls have no content.
|
||||
|
||||
@return: False
|
||||
@rtype: bool
|
||||
"""
|
||||
return False
|
||||
|
|
@ -1,81 +0,0 @@
|
|||
# Copyright (C) 2000-2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Handle telnet: links.
|
||||
"""
|
||||
|
||||
import telnetlib
|
||||
from . import urlbase
|
||||
from .. import log, LOG_CHECK
|
||||
|
||||
|
||||
def encode(s, encoding="iso-8859-1", errors="ignore"):
|
||||
"""Encode telnet data like username and password."""
|
||||
return s.encode(encoding, errors)
|
||||
|
||||
|
||||
class TelnetUrl(urlbase.UrlBase):
|
||||
"""
|
||||
Url link with telnet scheme.
|
||||
"""
|
||||
|
||||
def build_url(self):
|
||||
"""
|
||||
Call super.build_url(), set default telnet port and initialize
|
||||
the login credentials.
|
||||
"""
|
||||
super().build_url()
|
||||
# default port
|
||||
if self.port is None:
|
||||
self.port = 23
|
||||
# set user/pass
|
||||
self.user, self.password = self.get_user_password()
|
||||
|
||||
def local_check(self):
|
||||
"""
|
||||
Warn about empty host names. Else call super.local_check().
|
||||
"""
|
||||
if not self.host:
|
||||
self.set_result(_("Host is empty"), valid=False)
|
||||
return
|
||||
super().local_check()
|
||||
|
||||
def check_connection(self):
|
||||
"""
|
||||
Open a telnet connection and try to login. Expected login
|
||||
label is "login: ", expected password label is "Password: ".
|
||||
"""
|
||||
self.url_connection = telnetlib.Telnet(timeout=self.aggregate.config["timeout"])
|
||||
if log.is_debug(LOG_CHECK):
|
||||
self.url_connection.set_debuglevel(1)
|
||||
self.url_connection.open(self.host, self.port)
|
||||
if self.user:
|
||||
self.url_connection.read_until(b"login: ", 10)
|
||||
self.url_connection.write(encode(self.user) + b"\n")
|
||||
if self.password:
|
||||
self.url_connection.read_until(b"Password: ", 10)
|
||||
self.url_connection.write(encode(self.password) + b"\n")
|
||||
# XXX how to tell if we are logged in??
|
||||
self.url_connection.write(b"exit\n")
|
||||
|
||||
def can_get_content(self):
|
||||
"""
|
||||
Telnet URLs have no content.
|
||||
|
||||
@return: False
|
||||
@rtype: bool
|
||||
"""
|
||||
return False
|
||||
|
|
@ -62,11 +62,11 @@ ignored_schemes_permanent = r"""
|
|||
|cap # Calendar Access Protocol
|
||||
|cid # content identifier
|
||||
|coap # coap
|
||||
|coap\+tcp # coap+tcp [1]
|
||||
|coap\+ws # coap+ws [1]
|
||||
|coap\+tcp # coap+tcp (see [reviewer notes])
|
||||
|coap\+ws # coap+ws (see [reviewer notes])
|
||||
|coaps # coaps
|
||||
|coaps\+tcp # coaps+tcp [1]
|
||||
|coaps\+ws # coaps+ws [1]
|
||||
|coaps\+tcp # coaps+tcp (see [reviewer notes])
|
||||
|coaps\+ws # coaps+ws (see [reviewer notes])
|
||||
|crid # TV-Anytime Content Reference Identifier
|
||||
|data # data
|
||||
|dav # dav
|
||||
|
|
@ -97,11 +97,14 @@ ignored_schemes_permanent = r"""
|
|||
|mid # message identifier
|
||||
|msrp # Message Session Relay Protocol
|
||||
|msrps # Message Session Relay Protocol Secure
|
||||
|mt # Matter protocol on-boarding payloads that are encoded for use in QR Codes and/or NFC Tags
|
||||
|mtqp # Message Tracking Query Protocol
|
||||
|mupdate # Mailbox Update (MUPDATE) Protocol
|
||||
|news # USENET news
|
||||
|nfs # network file system protocol
|
||||
|ni # ni
|
||||
|nih # nih
|
||||
|nntp # USENET news using NNTP access
|
||||
|opaquelocktoken # opaquelocktokent
|
||||
|pkcs11 # PKCS#11
|
||||
|pop # Post Office Protocol v3
|
||||
|
|
@ -160,6 +163,7 @@ ignored_schemes_provisional = r"""
|
|||
|apt # apt
|
||||
|ar # ar
|
||||
|ark # ark
|
||||
|at # at (see [reviewer notes])
|
||||
|attachment # attachment
|
||||
|aw # aw
|
||||
|barion # barion
|
||||
|
|
@ -183,6 +187,7 @@ ignored_schemes_provisional = r"""
|
|||
|cvs # cvs
|
||||
|dab # dab
|
||||
|dat # dat
|
||||
|dhttp # dhttp (see [reviewer notes])
|
||||
|diaspora # diaspora
|
||||
|did # did
|
||||
|dis # dis
|
||||
|
|
@ -192,12 +197,12 @@ ignored_schemes_provisional = r"""
|
|||
|doi # doi
|
||||
|dpp # dpp
|
||||
|drm # drm
|
||||
|drop # drop
|
||||
|dtmi # dtmi
|
||||
|dvb # dvb
|
||||
|dvx # dvx
|
||||
|dweb # dweb
|
||||
|ed2k # ed2k
|
||||
|eid # eid
|
||||
|elsi # elsi
|
||||
|embedded # embedded
|
||||
|ens # ens
|
||||
|
|
@ -239,7 +244,9 @@ ignored_schemes_provisional = r"""
|
|||
|lastfm # lastfm
|
||||
|lbry # lbry
|
||||
|ldaps # ldaps
|
||||
|lid # lid
|
||||
|lorawan # lorawan
|
||||
|lpa # lpa
|
||||
|lvlt # lvlt
|
||||
|magnet # magnet
|
||||
|maps # maps
|
||||
|
|
@ -266,6 +273,7 @@ ignored_schemes_provisional = r"""
|
|||
|ms\-help # ms-help
|
||||
|ms\-infopath # ms-infopath
|
||||
|ms\-inputapp # ms-inputapp
|
||||
|ms\-launchremotedesktop # ms-launchremotedesktop
|
||||
|ms\-lockscreencomponent\-config # ms-lockscreencomponent-config
|
||||
|ms\-media\-stream\-id # ms-media-stream-id
|
||||
|ms\-meetnow # ms-meetnow
|
||||
|
|
@ -277,6 +285,8 @@ ignored_schemes_provisional = r"""
|
|||
|ms\-powerpoint # ms-powerpoint
|
||||
|ms\-project # ms-project
|
||||
|ms\-publisher # ms-publisher
|
||||
|ms\-remotedesktop # ms-remotedesktop
|
||||
|ms\-remotedesktop\-launch # ms-remotedesktop-launch
|
||||
|ms\-restoretabcompanion # ms-restoretabcompanion
|
||||
|ms\-screenclip # ms-screenclip
|
||||
|ms\-screensketch # ms-screensketch
|
||||
|
|
@ -317,7 +327,6 @@ ignored_schemes_provisional = r"""
|
|||
|ms\-word # ms-word
|
||||
|msnim # msnim
|
||||
|mss # mss
|
||||
|mt # Matter protocol
|
||||
|mumble # mumble
|
||||
|mvn # mvn
|
||||
|notes # notes
|
||||
|
|
@ -326,6 +335,7 @@ ignored_schemes_provisional = r"""
|
|||
|oid # oid
|
||||
|onenote # onenote
|
||||
|onenote\-cmd # onenote-cmd
|
||||
|openid # OpenID Connect
|
||||
|openpgp4fpr # openpgp4fpr
|
||||
|otpauth # otpauth
|
||||
|palm # palm
|
||||
|
|
@ -365,12 +375,14 @@ ignored_schemes_provisional = r"""
|
|||
|spotify # spotify
|
||||
|ssb # ssb
|
||||
|ssh # ssh
|
||||
|starknet # starknet
|
||||
|steam # steam
|
||||
|submit # submit
|
||||
|svn # svn
|
||||
|swh # swh
|
||||
|swid # swid
|
||||
|swidpath # swidpath
|
||||
|swid # swid (see [reviewer notes])
|
||||
|swidpath # swidpath (see [reviewer notes])
|
||||
|taler # taler
|
||||
|teamspeak # teamspeak
|
||||
|teliaeid # teliaeid
|
||||
|things # things
|
||||
|
|
@ -386,8 +398,9 @@ ignored_schemes_provisional = r"""
|
|||
|vscode # vscode
|
||||
|vscode\-insiders # vscode-insiders
|
||||
|vsls # vsls
|
||||
|w3 # w3
|
||||
|w3 # w3 (see [reviewer notes])
|
||||
|wcr # wcr
|
||||
|web\+ap # web+ap
|
||||
|web3 # web3
|
||||
|webcal # webcal
|
||||
|wifi # wifi
|
||||
|
|
@ -399,13 +412,18 @@ ignored_schemes_provisional = r"""
|
|||
"""
|
||||
|
||||
ignored_schemes_historical = r"""
|
||||
|bb # bb
|
||||
|drop # drop
|
||||
|fax # fax
|
||||
|filesystem # filesystem
|
||||
|grd # grd
|
||||
|mailserver # Access to data available from mail servers
|
||||
|modem # modem
|
||||
|p1 # p1
|
||||
|pack # pack
|
||||
|prospero # Prospero Directory Service
|
||||
|snews # NNTP over SSL/TLS
|
||||
|upt # upt
|
||||
|videotex # videotex
|
||||
|wais # Wide Area Information Servers
|
||||
|wpid # wpid
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ from .const import (
|
|||
from ..url import url_fix_wayback_query
|
||||
|
||||
# schemes that are invalid with an empty hostname
|
||||
scheme_requires_host = ("ftp", "http", "telnet")
|
||||
scheme_requires_host = ("ftp", "http")
|
||||
|
||||
|
||||
def urljoin(parent, url):
|
||||
|
|
|
|||
|
|
@ -37,8 +37,6 @@ Notes = _(
|
|||
o If your platform does not support threading, LinkChecker disables it
|
||||
automatically.
|
||||
o You can supply multiple user/password pairs in a configuration file.
|
||||
o When checking 'news:' links the given NNTP host doesn't need to be the
|
||||
same as the host of the user browsing your pages.
|
||||
"""
|
||||
)
|
||||
|
||||
|
|
@ -383,17 +381,6 @@ class ArgParser(LCArgumentParser):
|
|||
"expression. This option can be given multiple times."
|
||||
),
|
||||
)
|
||||
group.add_argument(
|
||||
"-N",
|
||||
"--nntp-server",
|
||||
dest="nntpserver",
|
||||
metavar="STRING",
|
||||
help=_(
|
||||
"Specify an NNTP server for 'news:...' links. Default is the\n"
|
||||
"environment variable NNTP_SERVER. If no host is given,\n"
|
||||
"only the syntax of the link is checked."
|
||||
),
|
||||
)
|
||||
group.add_argument(
|
||||
"-p",
|
||||
"--password",
|
||||
|
|
|
|||
|
|
@ -151,8 +151,6 @@ def setup_config(config, options):
|
|||
)
|
||||
new_logger = config.logger_new(ftype, **ns)
|
||||
config["fileoutput"].append(new_logger)
|
||||
if options.nntpserver:
|
||||
config["nntpserver"] = options.nntpserver
|
||||
if options.username:
|
||||
_username = options.username
|
||||
constructauth = True
|
||||
|
|
|
|||
|
|
@ -150,7 +150,6 @@ class Configuration(dict):
|
|||
self["maxrunseconds"] = None
|
||||
self["maxrequestspersecond"] = 10
|
||||
self["maxhttpredirects"] = 10
|
||||
self["nntpserver"] = os.environ.get("NNTP_SERVER", None)
|
||||
self["sslverify"] = True
|
||||
self["threads"] = 10
|
||||
self["timeout"] = 60
|
||||
|
|
|
|||
|
|
@ -194,7 +194,6 @@ class LCConfigParser(RawConfigParser):
|
|||
self.read_int_option(section, "timeout", min=1)
|
||||
self.read_int_option(section, "aborttimeout", min=1)
|
||||
self.read_int_option(section, "recursionlevel", min=-1)
|
||||
self.read_string_option(section, "nntpserver")
|
||||
self.read_string_option(section, "useragent")
|
||||
self.read_float_option(section, "maxrequestspersecond", min=0.001)
|
||||
self.read_int_option(section, "maxnumurls", min=0)
|
||||
|
|
|
|||
|
|
@ -146,8 +146,6 @@
|
|||
#aborttimeout=300
|
||||
# The recursion level determines how many times links inside pages are followed.
|
||||
#recursionlevel=-1
|
||||
# Basic NNTP server. Overrides NNTP_SERVER environment variable.
|
||||
#nntpserver=
|
||||
# parse a cookiefile for initial cookie data, example:
|
||||
#cookiefile=/path/to/cookies.txt
|
||||
# User-Agent header string to send to HTTP web servers
|
||||
|
|
|
|||
|
|
@ -36,7 +36,6 @@ url_encoding = "utf-8"
|
|||
default_ports = {
|
||||
'http': 80,
|
||||
'https': 443,
|
||||
'nntps': 563,
|
||||
'ftp': 21,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -90,7 +90,6 @@ version_scheme = "post-release"
|
|||
|
||||
[tool.hatch.envs.test]
|
||||
dependencies = [
|
||||
"miniboa",
|
||||
"parameterized",
|
||||
"pdfminer.six",
|
||||
"pyftpdlib",
|
||||
|
|
|
|||
|
|
@ -27,8 +27,6 @@ filter_uri_schemes_permanent = (
|
|||
"http",
|
||||
"https",
|
||||
"mailto",
|
||||
"news",
|
||||
"nntp",
|
||||
)
|
||||
|
||||
template = '''
|
||||
|
|
@ -49,7 +47,7 @@ ignored_schemes_other = r"""
|
|||
%(other)s
|
||||
"""
|
||||
|
||||
ignored_schemes = "^(%%s%%s%%s%%s)$" %% (
|
||||
ignored_schemes = "^({}{}{}{})$".format(
|
||||
ignored_schemes_permanent,
|
||||
ignored_schemes_provisional,
|
||||
ignored_schemes_historical,
|
||||
|
|
|
|||
|
|
@ -180,33 +180,6 @@ def has_pyftpdlib():
|
|||
need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib")
|
||||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_newsserver(server):
|
||||
import nntplib
|
||||
|
||||
try:
|
||||
nntp = nntplib.NNTP(server, usenetrc=False)
|
||||
nntp.quit()
|
||||
return True
|
||||
except nntplib.NNTPError:
|
||||
return False
|
||||
|
||||
|
||||
def need_newsserver(server):
|
||||
"""Decorator skipping test if newsserver is not available."""
|
||||
|
||||
def check_func(func):
|
||||
def newfunc(*args, **kwargs):
|
||||
if not has_newsserver(server):
|
||||
pytest.skip("Newsserver `%s' is not available" % server)
|
||||
return func(*args, **kwargs)
|
||||
|
||||
newfunc.__name__ = func.__name__
|
||||
return newfunc
|
||||
|
||||
return check_func
|
||||
|
||||
|
||||
@lru_cache(1)
|
||||
def has_x11():
|
||||
"""Test if DISPLAY variable is set."""
|
||||
|
|
|
|||
|
|
@ -1,107 +0,0 @@
|
|||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Define http test support classes for LinkChecker tests.
|
||||
"""
|
||||
import time
|
||||
import threading
|
||||
import telnetlib
|
||||
import miniboa
|
||||
from . import LinkCheckTest
|
||||
|
||||
|
||||
TIMEOUT = 5
|
||||
|
||||
|
||||
class TelnetServerTest(LinkCheckTest):
|
||||
"""Start/stop a Telnet server that can be used for testing."""
|
||||
|
||||
def __init__(self, methodName="runTest"):
|
||||
"""Init test class and store default ftp server port."""
|
||||
super().__init__(methodName=methodName)
|
||||
self.host = "localhost"
|
||||
self.port = None
|
||||
self.stop_event = threading.Event()
|
||||
self.server_thread = None
|
||||
|
||||
def get_url(self, user=None, password=None):
|
||||
if user is not None:
|
||||
if password is not None:
|
||||
netloc = f"{user}:{password}@{self.host}"
|
||||
else:
|
||||
netloc = f"{user}@{self.host}"
|
||||
else:
|
||||
netloc = self.host
|
||||
return "telnet://%s:%d" % (netloc, self.port)
|
||||
|
||||
def setUp(self):
|
||||
"""Start a new Telnet server in a new thread."""
|
||||
super().setUp()
|
||||
self.port, self.server_thread = start_server(self.host, 0, self.stop_event)
|
||||
self.assertFalse(self.port is None)
|
||||
|
||||
def tearDown(self):
|
||||
"""Send QUIT request to telnet server."""
|
||||
self.stop_event.set()
|
||||
if self.server_thread is not None:
|
||||
self.server_thread.join(10)
|
||||
assert not self.server_thread.is_alive()
|
||||
|
||||
|
||||
def start_server(host, port, stop_event):
|
||||
# Instantiate Telnet server class and listen to host:port
|
||||
clients = []
|
||||
|
||||
def on_connect(client):
|
||||
clients.append(client)
|
||||
client.send("Telnet test server\nlogin: ")
|
||||
|
||||
server = miniboa.TelnetServer(port=port, address=host, on_connect=on_connect)
|
||||
port = server.server_socket.getsockname()[1]
|
||||
t = threading.Thread(None, serve_forever, args=(server, clients, stop_event))
|
||||
t.start()
|
||||
# wait for server to start up
|
||||
tries = 0
|
||||
while tries < 5:
|
||||
tries += 1
|
||||
try:
|
||||
client = telnetlib.Telnet(timeout=TIMEOUT)
|
||||
client.open(host, port)
|
||||
client.write(b"exit\n")
|
||||
break
|
||||
except Exception:
|
||||
time.sleep(0.5)
|
||||
return port, t
|
||||
|
||||
|
||||
def serve_forever(server, clients, stop_event):
|
||||
"""Run poll loop for server."""
|
||||
while True:
|
||||
if stop_event.is_set():
|
||||
return
|
||||
server.poll()
|
||||
for client in clients:
|
||||
if client.active and client.cmd_ready:
|
||||
handle_cmd(client)
|
||||
|
||||
|
||||
def handle_cmd(client):
|
||||
"""Handle telnet clients."""
|
||||
msg = client.get_command().lower()
|
||||
if msg == "exit":
|
||||
client.active = False
|
||||
else:
|
||||
client.send("Password: ")
|
||||
|
|
@ -1,141 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2010,2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test news checking.
|
||||
"""
|
||||
import pytest
|
||||
from tests import need_newsserver, limit_time
|
||||
from . import LinkCheckTest
|
||||
|
||||
# Changes often, as servers tend to get invalid. Thus it is necessary
|
||||
# to enable the has_newsserver() resource manually.
|
||||
NNTP_SERVER = "news.uni-stuttgart.de"
|
||||
# info string returned by news server
|
||||
NNTP_INFO = (
|
||||
"200 news.uni-stuttgart.de InterNetNews NNRP server INN 2.5.2 ready (no posting)"
|
||||
)
|
||||
# Most free NNTP servers are slow, so don't waist a lot of time running those.
|
||||
NNTP_TIMEOUT_SECS = 30
|
||||
|
||||
|
||||
# disabled for now until some stable news server comes up
|
||||
@pytest.mark.skip(reason="disabled for now until some stable news server comes up")
|
||||
class TestNews(LinkCheckTest):
|
||||
"""Test nntp: and news: link checking."""
|
||||
|
||||
def newstest(self, url, resultlines):
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_news_without_host(self):
|
||||
# news testing
|
||||
url = "news:comp.os.linux.misc"
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"warning No NNTP server was specified, skipping this URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
# no group
|
||||
url = "news:"
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"warning No NNTP server was specified, skipping this URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_snews_with_group(self):
|
||||
url = "snews:de.comp.os.unix.linux.misc"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning No NNTP server was specified, skipping this URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_illegal_syntax(self):
|
||||
# illegal syntax
|
||||
url = "news:§$%&/´`(§%"
|
||||
qurl = self.norm(url)
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % qurl,
|
||||
"real url %s" % qurl,
|
||||
"warning No NNTP server was specified, skipping this URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_nntp_with_host(self):
|
||||
url = "nntp://%s/comp.lang.python" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"info %s" % NNTP_INFO,
|
||||
"info News group comp.lang.python found.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_article_span(self):
|
||||
url = "nntp://%s/comp.lang.python/1-5" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"info %s" % NNTP_INFO,
|
||||
"info News group comp.lang.python found.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
def test_article_span_no_host(self):
|
||||
url = "news:comp.lang.python/1-5"
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"warning No NNTP server was specified, skipping this URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
||||
@need_newsserver(NNTP_SERVER)
|
||||
@limit_time(NNTP_TIMEOUT_SECS, skip=True)
|
||||
def test_host_no_group(self):
|
||||
url = "nntp://%s/" % NNTP_SERVER
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"info %s" % NNTP_INFO,
|
||||
"warning No newsgroup specified in NNTP URL.",
|
||||
"valid",
|
||||
]
|
||||
self.newstest(url, resultlines)
|
||||
|
|
@ -1,60 +0,0 @@
|
|||
# Copyright (C) 2004-2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test telnet checking.
|
||||
"""
|
||||
from .telnetserver import TelnetServerTest
|
||||
|
||||
|
||||
class TestTelnet(TelnetServerTest):
|
||||
"""Test telnet: link checking."""
|
||||
|
||||
def test_telnet_error(self):
|
||||
url = "telnet:"
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key None",
|
||||
"real url %s" % nurl,
|
||||
"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_telnet_localhost(self):
|
||||
url = self.get_url()
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"valid",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
url = self.get_url(user="test")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"valid",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
url = self.get_url(user="test", password="test")
|
||||
resultlines = [
|
||||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"valid",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
|
@ -5,7 +5,6 @@ threads=5
|
|||
timeout=42
|
||||
aborttimeout=99
|
||||
recursionlevel=1
|
||||
nntpserver=example.org
|
||||
cookiefile=blablabla
|
||||
useragent=Example/0.0
|
||||
debugmemory=1
|
||||
|
|
|
|||
|
|
@ -48,7 +48,6 @@ class TestConfig(TestBase):
|
|||
self.assertEqual(config["timeout"], 42)
|
||||
self.assertEqual(config["aborttimeout"], 99)
|
||||
self.assertEqual(config["recursionlevel"], 1)
|
||||
self.assertEqual(config["nntpserver"], "example.org")
|
||||
self.assertEqual(config["cookiefile"], "blablabla")
|
||||
self.assertEqual(config["useragent"], "Example/0.0")
|
||||
self.assertEqual(config["debugmemory"], 1)
|
||||
|
|
|
|||
1
tox.ini
1
tox.ini
|
|
@ -8,7 +8,6 @@ deps =
|
|||
pyopenssl
|
||||
pytest-xdist
|
||||
pytest-cov
|
||||
miniboa >= 1.0.8
|
||||
GeoIP
|
||||
|
||||
[testenv]
|
||||
|
|
|
|||
Loading…
Reference in a new issue