From beaf9399f88c1d2e6c13f8ff14f54a304eda3c91 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 28 Aug 2023 19:22:24 +0100 Subject: [PATCH 1/3] Elevate redirection to a warning tagged http-redirected Include the HTTP status code and reason in the message. --- doc/changelog.txt | 1 + doc/src/man/linkcheckerrc.rst | 2 ++ doc/upgrading.txt | 3 +++ linkcheck/checker/const.py | 2 ++ linkcheck/checker/httpurl.py | 8 ++++++-- tests/checker/data/redir.html.result | 2 +- tests/checker/test_http_redirect.py | 4 ++-- 7 files changed, 17 insertions(+), 5 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 6a9c3466..8eb4ec63 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -2,6 +2,7 @@ Changes: - Minimum Python version required is 3.8 +- HTTP redirect causes a warning, http-redirected 10.2.1 (released 05.12.2022) diff --git a/doc/src/man/linkcheckerrc.rst b/doc/src/man/linkcheckerrc.rst index 63c03e12..6b2986d2 100644 --- a/doc/src/man/linkcheckerrc.rst +++ b/doc/src/man/linkcheckerrc.rst @@ -584,6 +584,8 @@ file entry: The URL had no content. **http-rate-limited** Too many HTTP requests. +**http-redirected** + Redirected to a different URL. **mail-no-mx-host** The mail MX host could not be found. **nntp-no-newsgroup** diff --git a/doc/upgrading.txt b/doc/upgrading.txt index 49d82630..41fb3bc7 100644 --- a/doc/upgrading.txt +++ b/doc/upgrading.txt @@ -5,6 +5,9 @@ Migrating from 10.2 to 10.x --------------------------- Python 3.8 or newer is required. +An HTTP redirect now causes a warning. Set ignorewarnings=http-redirected in +linkcheckerrc for the previous behaviour. + Migrating from 10.1 to 10.2 --------------------------- Python 3.7 or newer is required. diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py index ee55d04e..e18dea45 100644 --- a/linkcheck/checker/const.py +++ b/linkcheck/checker/const.py @@ -93,6 +93,7 @@ WARN_FTP_MISSING_SLASH = "ftp-missing-slash" WARN_HTTP_EMPTY_CONTENT = "http-empty-content" WARN_HTTP_COOKIE_STORE_ERROR = "http-cookie-store-error" WARN_HTTP_RATE_LIMITED = "http-rate-limited" +WARN_HTTP_REDIRECTED = "http-redirected" WARN_MAIL_NO_MX_HOST = "mail-no-mx-host" WARN_NNTP_NO_SERVER = "nntp-no-server" WARN_NNTP_NO_NEWSGROUP = "nntp-no-newsgroup" @@ -117,6 +118,7 @@ Warnings = { WARN_HTTP_EMPTY_CONTENT: _("The URL had no content."), WARN_HTTP_COOKIE_STORE_ERROR: _("An error occurred while storing a cookie."), WARN_HTTP_RATE_LIMITED: _("The URL request was rate limited."), + WARN_HTTP_REDIRECTED: _("Redirected to a different URL."), WARN_MAIL_NO_MX_HOST: _("The mail MX host could not be found."), WARN_NNTP_NO_SERVER: _("No NNTP server was found."), WARN_NNTP_NO_NEWSGROUP: _("The NNTP newsgroup could not be found."), diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index eb55b719..6498bd09 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -44,7 +44,7 @@ from .. import ( from . import internpaturl # import warnings -from .const import WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_RATE_LIMITED +from .const import WARN_HTTP_EMPTY_CONTENT, WARN_HTTP_RATE_LIMITED, WARN_HTTP_REDIRECTED from requests.sessions import REDIRECT_STATI HTTP_SCHEMAS = ('http://', 'https://') @@ -279,7 +279,11 @@ class HttpUrl(internpaturl.InternPatternUrl): log.debug(LOG_CHECK, "Redirected to %r", newurl) self.aliases.append(newurl) # XXX on redirect errors this is not printed - self.add_info(_("Redirected to `%(url)s'.") % {'url': newurl}) + self.add_warning( + _("Redirected to `%(url)s' status: %(code)d %(reason)s.") + % {'url': newurl, 'code': self.url_connection.status_code, + 'reason': self.url_connection.reason}, + tag=WARN_HTTP_REDIRECTED) # Reset extern and recalculate self.extern = None self.set_extern(newurl) diff --git a/tests/checker/data/redir.html.result b/tests/checker/data/redir.html.result index 02a5a93c..2961651b 100644 --- a/tests/checker/data/redir.html.result +++ b/tests/checker/data/redir.html.result @@ -7,5 +7,5 @@ url redirect_newhost.html cache key http://localhost:%(port)d/%(datadir)s/redirect_newhost.html real url http://www.example.com/ name redirect to new host -info Redirected to `http://www.example.com/'. +warning Redirected to `http://www.example.com/' status: 302 Found. valid diff --git a/tests/checker/test_http_redirect.py b/tests/checker/test_http_redirect.py index 1d18dc39..672309f7 100644 --- a/tests/checker/test_http_redirect.py +++ b/tests/checker/test_http_redirect.py @@ -43,7 +43,7 @@ class TestHttpRedirect(HttpServerTest): "url %s" % url, "cache key %s" % nurl, "real url %s" % rurl, - "info Redirected to `%s'." % rurl, + "warning Redirected to `%s' status: 302 Found." % rurl, "error", ] self.direct(url, resultlines, recursionlevel=0) @@ -56,7 +56,7 @@ class TestHttpRedirect(HttpServerTest): "url %s" % url, "cache key %s" % nurl, "real url %s" % rurl, - "info Redirected to `%s'." % rurl, + "warning Redirected to `%s' status: 302 Found." % rurl, "valid", ] self.direct(url, resultlines, recursionlevel=99) From 77da1c1b019167082da2a916b428b696f644d3bc Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 28 Aug 2023 19:22:24 +0100 Subject: [PATCH 2/3] Revert "Move ignorewarnings to output section in linkcheckerrc(5)" The section header names are used when parsing linkcheckerrc. This reverts commit f0b6ddda27588c7dc781960f0ff5a3578495f65a. --- doc/src/man/linkcheckerrc.rst | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/doc/src/man/linkcheckerrc.rst b/doc/src/man/linkcheckerrc.rst index 6b2986d2..0493c2eb 100644 --- a/doc/src/man/linkcheckerrc.rst +++ b/doc/src/man/linkcheckerrc.rst @@ -114,6 +114,10 @@ filtering **ignore=**\ *REGEX* (`MULTILINE`_) Only check syntax of URLs matching the given regular expressions. Command line option: :option:`--ignore-url` +**ignorewarnings=**\ *NAME*\ [**,**\ *NAME*...] + Ignore the comma-separated list of warnings. See `WARNINGS`_ for + the list of supported warnings. + Command line option: none **internlinks=**\ *REGEX* Regular expression to add more URLs recognized as internal links. Default is that URLs given on the command line are internal. @@ -188,10 +192,6 @@ URL checking results **warnings=**\ [**0**\ \|\ **1**] If set log warnings. Default is to log warnings. Command line option: :option:`--no-warnings` -**ignorewarnings=**\ *NAME*\ [**,**\ *NAME*...] - Ignore the comma-separated list of warnings. See `WARNINGS`_ for - the list of supported warnings. - Command line option: none **ignoreerrors=**\ *URL_REGEX* [*MESSAGE_REGEX*] (`MULTILINE`_) Specify regular expressions to ignore errors for matching URLs, one per line. A second regular expression can be specified per line to From 4d9749c5baa94c160a00e7903244a9cc49a4f21b Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 28 Aug 2023 19:22:24 +0100 Subject: [PATCH 3/3] Log ignored warning messages as info --- doc/changelog.txt | 1 + doc/src/man/linkcheckerrc.rst | 2 +- linkcheck/checker/urlbase.py | 10 +++++----- 3 files changed, 7 insertions(+), 6 deletions(-) diff --git a/doc/changelog.txt b/doc/changelog.txt index 8eb4ec63..72ff9cf8 100644 --- a/doc/changelog.txt +++ b/doc/changelog.txt @@ -3,6 +3,7 @@ Changes: - Minimum Python version required is 3.8 - HTTP redirect causes a warning, http-redirected +- Ignored warning messages are logged as information 10.2.1 (released 05.12.2022) diff --git a/doc/src/man/linkcheckerrc.rst b/doc/src/man/linkcheckerrc.rst index 0493c2eb..b172c7b2 100644 --- a/doc/src/man/linkcheckerrc.rst +++ b/doc/src/man/linkcheckerrc.rst @@ -116,7 +116,7 @@ filtering Command line option: :option:`--ignore-url` **ignorewarnings=**\ *NAME*\ [**,**\ *NAME*...] Ignore the comma-separated list of warnings. See `WARNINGS`_ for - the list of supported warnings. + the list of supported warnings. Messages are logged as information. Command line option: none **internlinks=**\ *REGEX* Regular expression to add more URLs recognized as internal links. diff --git a/linkcheck/checker/urlbase.py b/linkcheck/checker/urlbase.py index 58212890..7134ded9 100644 --- a/linkcheck/checker/urlbase.py +++ b/linkcheck/checker/urlbase.py @@ -384,11 +384,11 @@ class UrlBase: Add a warning string. """ item = (tag, s) - if ( - item not in self.warnings - and tag not in self.aggregate.config["ignorewarnings"] - ): - self.warnings.append(item) + if item not in self.warnings: + if tag in self.aggregate.config["ignorewarnings"]: + self.add_info(s) + else: + self.warnings.append(item) def add_info(self, s): """