Merge pull request #750 from cjmayo/redirected

Elevate redirection to a warning tagged http-redirected
This commit is contained in:
Chris Mayo 2023-09-04 19:21:37 +01:00 committed by GitHub
commit 2185f6ee82
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
8 changed files with 27 additions and 14 deletions

View file

@ -2,6 +2,8 @@
Changes:
- Minimum Python version required is 3.8
- HTTP redirect causes a warning, http-redirected
- Ignored warning messages are logged as information
10.2.1 (released 05.12.2022)

View file

@ -114,6 +114,10 @@ filtering
**ignore=**\ *REGEX* (`MULTILINE`_)
Only check syntax of URLs matching the given regular expressions.
Command line option: :option:`--ignore-url`
**ignorewarnings=**\ *NAME*\ [**,**\ *NAME*...]
Ignore the comma-separated list of warnings. See `WARNINGS`_ for
the list of supported warnings. Messages are logged as information.
Command line option: none
**internlinks=**\ *REGEX*
Regular expression to add more URLs recognized as internal links.
Default is that URLs given on the command line are internal.
@ -188,10 +192,6 @@ URL checking results
**warnings=**\ [**0**\ \|\ **1**]
If set log warnings. Default is to log warnings.
Command line option: :option:`--no-warnings`
**ignorewarnings=**\ *NAME*\ [**,**\ *NAME*...]
Ignore the comma-separated list of warnings. See `WARNINGS`_ for
the list of supported warnings.
Command line option: none
**ignoreerrors=**\ *URL_REGEX* [*MESSAGE_REGEX*] (`MULTILINE`_)
Specify regular expressions to ignore errors for matching URLs, one
per line. A second regular expression can be specified per line to
@ -584,6 +584,8 @@ file entry:
The URL had no content.
**http-rate-limited**
Too many HTTP requests.
**http-redirected**
Redirected to a different URL.
**mail-no-mx-host**
The mail MX host could not be found.
**nntp-no-newsgroup**

View file

@ -5,6 +5,9 @@ Migrating from 10.2 to 10.x
---------------------------
Python 3.8 or newer is required.
An HTTP redirect now causes a warning. Set ignorewarnings=http-redirected in
linkcheckerrc for the previous behaviour.
Migrating from 10.1 to 10.2
---------------------------
Python 3.7 or newer is required.

View file

@ -93,6 +93,7 @@ WARN_FTP_MISSING_SLASH = "ftp-missing-slash"
WARN_HTTP_EMPTY_CONTENT = "http-empty-content"
WARN_HTTP_COOKIE_STORE_ERROR = "http-cookie-store-error"
WARN_HTTP_RATE_LIMITED = "http-rate-limited"
WARN_HTTP_REDIRECTED = "http-redirected"
WARN_MAIL_NO_MX_HOST = "mail-no-mx-host"
WARN_NNTP_NO_SERVER = "nntp-no-server"
WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup"
@ -117,6 +118,7 @@ Warnings = {
WARN_HTTP_EMPTY_CONTENT: _("The URL had no content."),
WARN_HTTP_COOKIE_STORE_ERROR: _("An error occurred while storing a cookie."),
WARN_HTTP_RATE_LIMITED: _("The URL request was rate limited."),
WARN_HTTP_REDIRECTED: _("Redirected to a different URL."),
WARN_MAIL_NO_MX_HOST: _("The mail MX host could not be found."),
WARN_NNTP_NO_SERVER: _("No NNTP server was found."),
WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."),

View file

@ -44,7 +44,7 @@ from .. import (
from . import internpaturl
# import warnings
from .const import WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_RATE_LIMITED
from .const import WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_RATE_LIMITED, WARN_HTTP_REDIRECTED
from requests.sessions import REDIRECT_STATI
HTTP_SCHEMAS = ('http://', 'https://')
@ -279,7 +279,11 @@ class HttpUrl(internpaturl.InternPatternUrl):
log.debug(LOG_CHECK, "Redirected to %r", newurl)
self.aliases.append(newurl)
# XXX on redirect errors this is not printed
self.add_info(_("Redirected to `%(url)s'.") % {'url': newurl})
self.add_warning(
_("Redirected to `%(url)s' status: %(code)d %(reason)s.")
% {'url': newurl, 'code': self.url_connection.status_code,
'reason': self.url_connection.reason},
tag=WARN_HTTP_REDIRECTED)
# Reset extern and recalculate
self.extern = None
self.set_extern(newurl)

View file

@ -384,11 +384,11 @@ class UrlBase:
Add a warning string.
"""
item = (tag, s)
if (
item not in self.warnings
and tag not in self.aggregate.config["ignorewarnings"]
):
self.warnings.append(item)
if item not in self.warnings:
if tag in self.aggregate.config["ignorewarnings"]:
self.add_info(s)
else:
self.warnings.append(item)
def add_info(self, s):
"""

View file

@ -7,5 +7,5 @@ url redirect_newhost.html
cache key http://localhost:%(port)d/%(datadir)s/redirect_newhost.html
real url http://www.example.com/
name redirect to new host
info Redirected to `http://www.example.com/'.
warning Redirected to `http://www.example.com/' status: 302 Found.
valid

View file

@ -43,7 +43,7 @@ class TestHttpRedirect(HttpServerTest):
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % rurl,
"info Redirected to `%s'." % rurl,
"warning Redirected to `%s' status: 302 Found." % rurl,
"error",
]
self.direct(url, resultlines, recursionlevel=0)
@ -56,7 +56,7 @@ class TestHttpRedirect(HttpServerTest):
"url %s" % url,
"cache key %s" % nurl,
"real url %s" % rurl,
"info Redirected to `%s'." % rurl,
"warning Redirected to `%s' status: 302 Found." % rurl,
"valid",
]
self.direct(url, resultlines, recursionlevel=99)