mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Add warning url-content-type-unparseable
This commit is contained in:
parent
fb8df6f089
commit
d6936ceb91
12 changed files with 34 additions and 21 deletions
|
|
@ -569,6 +569,8 @@ file entry:
|
|||
The URL content size is zero.
|
||||
**url-content-too-large**
|
||||
The URL content size is too large.
|
||||
**url-content-type-unparseable**
|
||||
The URL content type is not parseable.
|
||||
**url-effective-url**
|
||||
The effective URL is different from the original.
|
||||
**url-error-getting-content**
|
||||
|
|
|
|||
|
|
@ -85,6 +85,7 @@ WARN_URL_EFFECTIVE_URL = "url-effective-url"
|
|||
WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content"
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
|
||||
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
|
||||
WARN_URL_CONTENT_TYPE_UNPARSEABLE = "url-content-type-unparseable"
|
||||
WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip"
|
||||
WARN_URL_RATE_LIMITED = "url-rate-limited"
|
||||
WARN_URL_TOO_LONG = "url-too-long"
|
||||
|
|
@ -106,6 +107,7 @@ Warnings = {
|
|||
WARN_URL_ERROR_GETTING_CONTENT: _("Could not get the content of the URL."),
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."),
|
||||
WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."),
|
||||
WARN_URL_CONTENT_TYPE_UNPARSEABLE: _("The URL content type is not parseable."),
|
||||
WARN_URL_RATE_LIMITED: _(
|
||||
"The URL request was rate limited so need reduce number of requests."
|
||||
),
|
||||
|
|
|
|||
|
|
@ -272,12 +272,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
return True
|
||||
if firefox.has_sqlite and firefox.extension.search(self.url):
|
||||
return True
|
||||
if self.content_type in self.ContentMimetypes:
|
||||
return True
|
||||
log.debug(
|
||||
LOG_CHECK, "File with content type %r is not parseable.", self.content_type
|
||||
)
|
||||
return False
|
||||
return self.is_content_type_parseable()
|
||||
|
||||
def set_content_type(self):
|
||||
"""Return URL content type, or an empty string if content
|
||||
|
|
|
|||
|
|
@ -154,12 +154,7 @@ class FtpUrl(internpaturl.InternPatternUrl):
|
|||
"""See if URL target is parseable for recursion."""
|
||||
if self.is_directory():
|
||||
return True
|
||||
if self.content_type in self.ContentMimetypes:
|
||||
return True
|
||||
log.debug(
|
||||
LOG_CHECK, "URL with content type %r is not parseable.", self.content_type
|
||||
)
|
||||
return False
|
||||
return self.is_content_type_parseable()
|
||||
|
||||
def is_directory(self):
|
||||
"""See if URL target is a directory."""
|
||||
|
|
|
|||
|
|
@ -375,14 +375,7 @@ class HttpUrl(internpaturl.InternPatternUrl):
|
|||
if rtype is not None:
|
||||
# XXX side effect
|
||||
self.content_type = rtype
|
||||
if self.content_type not in self.ContentMimetypes:
|
||||
log.debug(
|
||||
LOG_CHECK,
|
||||
"URL with content type %r is not parseable",
|
||||
self.content_type,
|
||||
)
|
||||
return False
|
||||
return True
|
||||
return self.is_content_type_parseable()
|
||||
|
||||
def get_robots_txt_url(self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ from .const import (
|
|||
WARN_URL_OBFUSCATED_IP,
|
||||
WARN_URL_CONTENT_SIZE_ZERO,
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE,
|
||||
WARN_URL_CONTENT_TYPE_UNPARSEABLE,
|
||||
WARN_URL_WHITESPACE,
|
||||
URL_MAX_LENGTH,
|
||||
WARN_URL_TOO_LONG,
|
||||
|
|
@ -306,6 +307,24 @@ class UrlBase:
|
|||
self.title = title
|
||||
return self.title
|
||||
|
||||
def is_content_type_parseable(self):
|
||||
"""
|
||||
Return True iff the content type of this url is parseable.
|
||||
"""
|
||||
if self.content_type in self.ContentMimetypes:
|
||||
return True
|
||||
log.debug(
|
||||
LOG_CHECK,
|
||||
"URL with content type %r is not parseable",
|
||||
self.content_type,
|
||||
)
|
||||
if self.recursion_level == 0:
|
||||
self.add_warning(
|
||||
_("The URL with content type %r is not parseable.") % self.content_type,
|
||||
tag=WARN_URL_CONTENT_TYPE_UNPARSEABLE,
|
||||
)
|
||||
return False
|
||||
|
||||
def is_parseable(self):
|
||||
"""
|
||||
Return True iff content of this url is parseable.
|
||||
|
|
|
|||
|
|
@ -2,4 +2,5 @@ url file://%(curdir)s/%(datadir)s/file.asc
|
|||
cache key file://%(curdir)s/%(datadir)s/file.asc
|
||||
real url file://%(curdir)s/%(datadir)s/file.asc
|
||||
name %(datadir)s/file.asc
|
||||
warning The URL with content type 'application/octet-stream' is not parseable.
|
||||
valid
|
||||
|
|
|
|||
|
|
@ -2,6 +2,7 @@ url file://%(curdir)s/%(datadir)s/file.markdown
|
|||
cache key file://%(curdir)s/%(datadir)s/file.markdown
|
||||
real url file://%(curdir)s/%(datadir)s/file.markdown
|
||||
name %(datadir)s/file.markdown
|
||||
warning The URL with content type 'application/octet-stream' is not parseable.
|
||||
valid
|
||||
|
||||
url http://url.example.com
|
||||
|
|
|
|||
|
|
@ -2,4 +2,5 @@ url file://%(curdir)s/%(datadir)s/file.txt
|
|||
cache key file://%(curdir)s/%(datadir)s/file.txt
|
||||
real url file://%(curdir)s/%(datadir)s/file.txt
|
||||
name %(datadir)s/file.txt
|
||||
warning The URL with content type 'text/plain' is not parseable.
|
||||
valid
|
||||
|
|
|
|||
|
|
@ -2,5 +2,5 @@ url file://%(curdir)s/%(datadir)s/Мошкова.bin
|
|||
cache key file://%(curdir)s/%(datadir)s/%%D0%%9C%%D0%%BE%%D1%%88%%D0%%BA%%D0%%BE%%D0%%B2%%D0%%B0.bin
|
||||
real url file://%(curdir)s/%(datadir)s/%%D0%%9C%%D0%%BE%%D1%%88%%D0%%BA%%D0%%BE%%D0%%B2%%D0%%B0.bin
|
||||
name %(datadir)s/Мошкова.bin
|
||||
warning The URL with content type 'application/octet-stream' is not parseable.
|
||||
valid
|
||||
|
||||
|
|
@ -32,6 +32,7 @@ class TestHttpRobots(HttpServerTest):
|
|||
"url %s" % url,
|
||||
"cache key %s" % url,
|
||||
"real url %s" % url,
|
||||
"warning The URL with content type 'text/plain' is not parseable.",
|
||||
"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=5)
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ class TestHttpbin(LinkCheckTest):
|
|||
"url %s" % url,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning The URL with content type 'application/json' is not parseable.",
|
||||
"valid",
|
||||
"url %s" % linkurl,
|
||||
"cache key %s" % nlinkurl,
|
||||
|
|
@ -75,6 +76,7 @@ class TestHttpbin(LinkCheckTest):
|
|||
"url %s" % url,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning The URL with content type 'application/json' is not parseable.",
|
||||
"valid",
|
||||
"url %s" % linkurl,
|
||||
"cache key %s" % nlinkurl,
|
||||
|
|
@ -94,6 +96,7 @@ class TestHttpbin(LinkCheckTest):
|
|||
"url %s" % url,
|
||||
"cache key %s" % nurl,
|
||||
"real url %s" % nurl,
|
||||
"warning The URL with content type 'application/json' is not parseable.",
|
||||
"valid",
|
||||
"url %s" % linkurl,
|
||||
"cache key %s" % nlinkurl,
|
||||
|
|
|
|||
Loading…
Reference in a new issue