Remove the duplicate URL content check.

This commit is contained in:
Bastian Kleineidam 2013-12-04 19:12:40 +01:00
parent 84dac60f57
commit 023da7c993
7 changed files with 3 additions and 28 deletions

View file

@ -7,6 +7,7 @@ Features:
Changes:
- checking: Always use the W3C validator to check HTML or CSS syntax.
- checking: Remove the http-wrong-redirect warning.
- checking: Remove the url-content-duplicate warning.
- cmdline: Replace argument parsing. No changes in functionality, only
the help text will be formatted different.

View file

@ -82,7 +82,6 @@ WARN_URL_EFFECTIVE_URL = "url-effective-url"
WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content"
WARN_URL_ANCHOR_NOT_FOUND = "url-anchor-not-found"
WARN_URL_WARNREGEX_FOUND = "url-warnregex-found"
WARN_URL_CONTENT_DUPLICATE = "url-content-duplicate"
WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
@ -119,7 +118,6 @@ Warnings = {
WARN_URL_ANCHOR_NOT_FOUND: _("URL anchor was not found."),
WARN_URL_WARNREGEX_FOUND:
_("The warning regular expression was found in the URL contents."),
WARN_URL_CONTENT_DUPLICATE: _("The URL content is a duplicate of another URL."),
WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."),
WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."),
WARN_URL_CONTENT_SIZE_UNEQUAL: _("The URL content size and download size are unequal."),

View file

@ -218,7 +218,6 @@ class FtpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledco
def stor_data (s):
"""Helper method storing given data"""
urls = self.aggregate.add_download_data(self.cache_content_key, s)
self.warn_duplicate_content(urls)
# limit the download size
if (buf.tell() + len(s)) > self.MaxFilesizeBytes:
raise LinkCheckerError(_("FTP file size too large"))

View file

@ -678,7 +678,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport, pooledc
raise LinkCheckerError(_("File size too large"))
dlsize = len(data)
urls = self.aggregate.add_download_data(self.cache_content_key, data)
self.warn_duplicate_content(urls)
encoding = headers.get_content_encoding(self.headers)
if encoding in SUPPORTED_ENCODINGS:
try:

View file

@ -40,8 +40,7 @@ from .const import (WARN_URL_EFFECTIVE_URL,
WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
WARN_URL_CONTENT_SIZE_UNEQUAL, WARN_URL_WHITESPACE,
WARN_URL_TOO_LONG, URL_MAX_LENGTH, URL_WARN_LENGTH,
WARN_URL_CONTENT_DUPLICATE, WARN_SYNTAX_HTML,
WARN_SYNTAX_CSS,
WARN_SYNTAX_HTML, WARN_SYNTAX_CSS,
ExcList, ExcSyntaxList, ExcNoCacheList)
# helper alias
@ -763,24 +762,8 @@ class UrlBase (object):
raise LinkCheckerError(_("File size too large"))
if not self.is_local():
urls = self.aggregate.add_download_data(self.cache_content_key, data)
self.warn_duplicate_content(urls)
return data, len(data)
def warn_duplicate_content(self, urls):
"""If given URL list is not empty, warn about duplicate URL content.
@param urls: URLs with duplicate content
@ptype urls: list of unicode
"""
if not urls or self.size <= 0:
return
if urlutil.is_duplicate_content_url(self.url, urls[0]):
return
args = dict(
urls=u",".join(urls),
size=_(" with %s") % strformat.strsize(self.size),
)
self.add_warning(_("Content%(size)s is the same as in URLs (%(urls)s).") % args, tag=WARN_URL_CONTENT_DUPLICATE)
def check_content (self):
"""Check content data for warnings, syntax errors, viruses etc."""
if not (self.valid and self.can_get_content()):

View file

@ -30,14 +30,12 @@ url /?d=directory&p=page1
cache key http://localhost:%(port)d/?d=directory&p=page1
real url http://localhost:%(port)d/?d=directory&p=page1
name should not be cached
warning Content with 309B is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page).
valid
url /?q=ü
cache key http://localhost:%(port)d/?q=%%C3%%BC
real url http://localhost:%(port)d/?q=%%C3%%BC
name html entities
warning Content with 309B is the same as in URLs (http://localhost:%(port)d/?d=directory&p=page,http://localhost:%(port)d/?d=directory&p=page1).
valid
url file.css
@ -70,9 +68,7 @@ valid
url http://example.org/foo/ #a=1,2,3
cache key http://example.org/foo/%%20#a%%3D1%%2C2%%2C3
real url http://www.iana.org/domains/example#a%%3D1%%2C2%%2C3
info Redirected to `http://www.iana.org/domains/example/'.
info Redirected to `http://www.iana.org/domains/example'.
real url http://example.org/foo/%%20#a%%3D1%%2C2%%2C3
warning Anchor `a%%3D1%%2C2%%2C3' not found. Available anchors: -.
valid

View file

@ -63,7 +63,6 @@ class TestHttpRedirect (HttpServerTest):
u"real url %s" % rurl,
u"name Recursive Redirect",
u"info 1 URL parsed.",
u"warning Content with 45B is the same as in URLs (%s)." % nurl,
u"valid",
]
self.direct(url, resultlines, recursionlevel=99)