diff --git a/linkcheck/checker/const.py b/linkcheck/checker/const.py index 504ba9ce..5ba984f7 100644 --- a/linkcheck/checker/const.py +++ b/linkcheck/checker/const.py @@ -84,6 +84,7 @@ WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content" WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large" WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero" WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip" +WARN_URL_RATE_LIMITED = "url-rate-limited" WARN_URL_TOO_LONG = "url-too-long" WARN_URL_WHITESPACE = "url-whitespace" WARN_FILE_MISSING_SLASH = "file-missing-slash" @@ -105,6 +106,7 @@ Warnings = { _("Could not get the content of the URL."), WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."), WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."), + WARN_URL_RATE_LIMITED: _("The URL request was rate limited so need reduce number of requests."), WARN_URL_TOO_LONG: _("The URL is longer than the recommended size."), WARN_URL_WHITESPACE: _("The URL contains leading or trailing whitespace."), WARN_FILE_MISSING_SLASH: _("The file: URL is missing a trailing slash."), diff --git a/linkcheck/checker/httpurl.py b/linkcheck/checker/httpurl.py index 498e279f..962e97a3 100644 --- a/linkcheck/checker/httpurl.py +++ b/linkcheck/checker/httpurl.py @@ -34,7 +34,7 @@ from . import (internpaturl, proxysupport) from ..HtmlParser import htmlsax from ..htmlutil import linkparse # import warnings -from .const import WARN_HTTP_EMPTY_CONTENT +from .const import (WARN_HTTP_EMPTY_CONTENT, WARN_URL_RATE_LIMITED) from requests.sessions import REDIRECT_STATI # assumed HTTP header encoding @@ -295,7 +295,7 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): def check_response (self): """Check final result and log it.""" - if self.url_connection.status_code >= 400: + if self.url_connection.status_code >= 400 and self.url_connection.status_code != 429: self.set_result(u"%d %s" % (self.url_connection.status_code, self.url_connection.reason), valid=False) else: @@ -303,6 +303,11 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport): # no content self.add_warning(self.url_connection.reason, tag=WARN_HTTP_EMPTY_CONTENT) + + if self.url_connection.status_code == 429: + self.add_warning(u"Rate limited (Retry-After: %s)" % self.getheader(_("Retry-After")), + tag=WARN_URL_RATE_LIMITED) + if self.url_connection.status_code >= 200: self.set_result(u"%r %s" % (self.url_connection.status_code, self.url_connection.reason)) else: