mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-30 02:54:42 +00:00
Remove the duplicate URL content check.
This commit is contained in:
parent
84dac60f57
commit
023da7c993
7 changed files with 3 additions and 28 deletions
|
|
@ -7,6 +7,7 @@ Features:
|
|||
Changes:
|
||||
- checking: Always use the W3C validator to check HTML or CSS syntax.
|
||||
- checking: Remove the http-wrong-redirect warning.
|
||||
- checking: Remove the url-content-duplicate warning.
|
||||
- cmdline: Replace argument parsing. No changes in functionality, only
|
||||
the help text will be formatted different.
|
||||
|
||||
|
|
|
|||
|
|
@ -82,7 +82,6 @@ WARN_URL_EFFECTIVE_URL = "url-effective-url"
|
|||
WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content"
|
||||
WARN_URL_ANCHOR_NOT_FOUND = "url-anchor-not-found"
|
||||
WARN_URL_WARNREGEX_FOUND = "url-warnregex-found"
|
||||
WARN_URL_CONTENT_DUPLICATE = "url-content-duplicate"
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
|
||||
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
|
||||
|
|
@ -119,7 +118,6 @@ Warnings = {
|
|||
WARN_URL_ANCHOR_NOT_FOUND: _("URL anchor was not found."),
|
||||
WARN_URL_WARNREGEX_FOUND:
|
||||
_("The warning regular expression was found in the URL contents."),
|
||||
WARN_URL_CONTENT_DUPLICATE: _("The URL content is a duplicate of another URL."),
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."),
|
||||
WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."),
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL: _("The URL content size and download size are unequal."),
|
||||
|
|
|
|||
|
|
@ -218,7 +218,6 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledco
|
|||
def stor_data (s):
|
||||
"""Helper method storing given data"""
|
||||
urls = self.aggregate.add_download_data(self.cache_content_key, s)
|
||||
self.warn_duplicate_content(urls)
|
||||
# limit the download size
|
||||
if (buf.tell() + len(s)) > self.MaxFilesizeBytes:
|
||||
raise LinkCheckerError(_("FTP file size too large"))
|
||||
|
|
|
|||
|
|
@ -678,7 +678,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledc
|
|||
raise LinkCheckerError(_("File size too large"))
|
||||
dlsize = len(data)
|
||||
urls = self.aggregate.add_download_data(self.cache_content_key, data)
|
||||
self.warn_duplicate_content(urls)
|
||||
encoding = headers.get_content_encoding(self.headers)
|
||||
if encoding in SUPPORTED_ENCODINGS:
|
||||
try:
|
||||
|
|
|
|||
|
|
@ -40,8 +40,7 @@ from .const import (WARN_URL_EFFECTIVE_URL,
|
|||
WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL, WARN_URL_WHITESPACE,
|
||||
WARN_URL_TOO_LONG, URL_MAX_LENGTH, URL_WARN_LENGTH,
|
||||
WARN_URL_CONTENT_DUPLICATE, WARN_SYNTAX_HTML,
|
||||
WARN_SYNTAX_CSS,
|
||||
WARN_SYNTAX_HTML, WARN_SYNTAX_CSS,
|
||||
ExcList, ExcSyntaxList, ExcNoCacheList)
|
||||
|
||||
# helper alias
|
||||
|
|
@ -763,24 +762,8 @@ class UrlBase (object):
|
|||
raise LinkCheckerError(_("File size too large"))
|
||||
if not self.is_local():
|
||||
urls = self.aggregate.add_download_data(self.cache_content_key, data)
|
||||
self.warn_duplicate_content(urls)
|
||||
return data, len(data)
|
||||
|
||||
def warn_duplicate_content(self, urls):
|
||||
"""If given URL list is not empty, warn about duplicate URL content.
|
||||
@param urls: URLs with duplicate content
|
||||
@ptype urls: list of unicode
|
||||
"""
|
||||
if not urls or self.size <= 0:
|
||||
return
|
||||
if urlutil.is_duplicate_content_url(self.url, urls[0]):
|
||||
return
|
||||
args = dict(
|
||||
urls=u",".join(urls),
|
||||
size=_(" with %s") % strformat.strsize(self.size),
|
||||
)
|
||||
self.add_warning(_("Content%(size)s is the same as in URLs (%(urls)s).") % args, tag=WARN_URL_CONTENT_DUPLICATE)
|
||||
|
||||
def check_content (self):
|
||||
"""Check content data for warnings, syntax errors, viruses etc."""
|
||||
if not (self.valid and self.can_get_content()):
|
||||
|
|
|
|||
|
|
@ -30,14 +30,12 @@ url /?d=directory&p=page1
|
|||
cache key http://localhost:%(port)d/?d=directory&p=page1
|
||||
real url http://localhost:%(port)d/?d=directory&p=page1
|
||||
name should not be cached
|
||||
warning Content with 309B is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page).
|
||||
valid
|
||||
|
||||
url /?q=ü
|
||||
cache key http://localhost:%(port)d/?q=%%C3%%BC
|
||||
real url http://localhost:%(port)d/?q=%%C3%%BC
|
||||
name html entities
|
||||
warning Content with 309B is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page,http://localhost:%(port)d/?d=directory&p=page1).
|
||||
valid
|
||||
|
||||
url file.css
|
||||
|
|
@ -70,9 +68,7 @@ valid
|
|||
|
||||
url http://example.org/foo/ #a=1,2,3
|
||||
cache key http://example.org/foo/%%20#a%%3D1%%2C2%%2C3
|
||||
real url http://www.iana.org/domains/example#a%%3D1%%2C2%%2C3
|
||||
info Redirected to `http://www.iana.org/domains/example/'.
|
||||
info Redirected to `http://www.iana.org/domains/example'.
|
||||
real url http://example.org/foo/%%20#a%%3D1%%2C2%%2C3
|
||||
warning Anchor `a%%3D1%%2C2%%2C3' not found. Available anchors: -.
|
||||
valid
|
||||
|
||||
|
|
|
|||
|
|
@ -63,7 +63,6 @@ class TestHttpRedirect (HttpServerTest):
|
|||
u"real url %s" % rurl,
|
||||
u"name Recursive Redirect",
|
||||
u"info 1 URL parsed.",
|
||||
u"warning Content with 45B is the same as in URLs (%s)." % nurl,
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
|
|
|||
Loading…
Reference in a new issue