mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-20 20:31:53 +00:00
Warn about too long URLs.
This commit is contained in:
parent
03667a4ec9
commit
cb71f483a5
4 changed files with 80 additions and 7 deletions
|
|
@ -8,6 +8,8 @@ Changes:
|
|||
- doc: Mention 7-zip to extract the .tar.xz under Windows.
|
||||
Closes: SF bug #3564733
|
||||
- checking: Send a HTTP Do-Not-Track header.
|
||||
- checking: Check URL length. Print error on URL longer than 2000 characters,
|
||||
warning for longer than 255 characters.
|
||||
- logging: Print download and cache statistics in text output logger.
|
||||
- logging: Print warning tag in text output logger. Makes warning filtering
|
||||
more easy.
|
||||
|
|
|
|||
|
|
@ -71,6 +71,11 @@ except ImportError:
|
|||
|
||||
ExcList = ExcCacheList + ExcNoCacheList
|
||||
|
||||
# some constants
|
||||
URL_MAX_LENGTH = 2000
|
||||
URL_WARN_LENGTH = 255
|
||||
|
||||
# the warnings
|
||||
WARN_URL_EFFECTIVE_URL = "url-effective-url"
|
||||
WARN_URL_ERROR_GETTING_CONTENT = "url-error-getting-content"
|
||||
WARN_URL_ANCHOR_NOT_FOUND = "url-anchor-not-found"
|
||||
|
|
@ -79,6 +84,7 @@ WARN_URL_CONTENT_SIZE_TOO_LARGE = "url-content-too-large"
|
|||
WARN_URL_CONTENT_SIZE_ZERO = "url-content-size-zero"
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL = "url-content-size-unequal"
|
||||
WARN_URL_OBFUSCATED_IP = "url-obfuscated-ip"
|
||||
WARN_URL_TOO_LONG = "url-too-long"
|
||||
WARN_URL_WHITESPACE = "url-whitespace"
|
||||
WARN_FILE_MISSING_SLASH = "file-missing-slash"
|
||||
WARN_FILE_SYSTEM_PATH = "file-system-path"
|
||||
|
|
@ -112,6 +118,7 @@ Warnings = {
|
|||
WARN_URL_CONTENT_SIZE_TOO_LARGE: _("The URL content size is too large."),
|
||||
WARN_URL_CONTENT_SIZE_ZERO: _("The URL content size is zero."),
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL: _("The URL content size and download size are unequal."),
|
||||
WARN_URL_TOO_LONG: _("The URL is longer than the recommended size."),
|
||||
WARN_URL_WHITESPACE: _("The URL contains leading or trailing whitespace."),
|
||||
WARN_FILE_MISSING_SLASH: _("The file: URL is missing a trailing slash."),
|
||||
WARN_FILE_SYSTEM_PATH:
|
||||
|
|
|
|||
|
|
@ -40,6 +40,7 @@ from .const import (WARN_URL_EFFECTIVE_URL,
|
|||
WARN_URL_ANCHOR_NOT_FOUND, WARN_URL_WARNREGEX_FOUND,
|
||||
WARN_URL_CONTENT_SIZE_TOO_LARGE, WARN_URL_CONTENT_SIZE_ZERO,
|
||||
WARN_URL_CONTENT_SIZE_UNEQUAL, WARN_URL_WHITESPACE,
|
||||
WARN_URL_TOO_LONG, URL_MAX_LENGTH, URL_WARN_LENGTH,
|
||||
ExcList, ExcSyntaxList, ExcNoCacheList)
|
||||
|
||||
# helper alias
|
||||
|
|
@ -389,19 +390,29 @@ class UrlBase (object):
|
|||
return
|
||||
try:
|
||||
self.build_url()
|
||||
# check url warnings
|
||||
effectiveurl = urlutil.urlunsplit(self.urlparts)
|
||||
if self.url != effectiveurl:
|
||||
self.add_warning(_("Effective URL %(url)r.") %
|
||||
{"url": effectiveurl},
|
||||
tag=WARN_URL_EFFECTIVE_URL)
|
||||
self.url = effectiveurl
|
||||
self.check_url_warnings()
|
||||
except tuple(ExcSyntaxList), msg:
|
||||
self.set_result(unicode_safe(msg), valid=False)
|
||||
else:
|
||||
self.set_cache_keys()
|
||||
self.set_extern(self.url)
|
||||
|
||||
def check_url_warnings(self):
|
||||
"""Check URL name and length."""
|
||||
effectiveurl = urlutil.urlunsplit(self.urlparts)
|
||||
if self.url != effectiveurl:
|
||||
self.add_warning(_("Effective URL %(url)r.") %
|
||||
{"url": effectiveurl},
|
||||
tag=WARN_URL_EFFECTIVE_URL)
|
||||
self.url = effectiveurl
|
||||
if len(self.url) > URL_MAX_LENGTH:
|
||||
args = dict(len=len(self.url), max=URL_MAX_LENGTH)
|
||||
self.set_result(_("URL length %(len)d is longer than maximum of %(max)d.") % args, valid=False)
|
||||
elif len(self.url) > URL_WARN_LENGTH:
|
||||
args = dict(len=len(self.url), warn=URL_WARN_LENGTH)
|
||||
self.add_warning(_("URL length %(len)d is longer than %(warn)d.") % args,
|
||||
tag=WARN_URL_TOO_LONG)
|
||||
|
||||
def build_url (self):
|
||||
"""
|
||||
Construct self.url and self.urlparts out of the given base
|
||||
|
|
|
|||
53
tests/checker/test_urllen.py
Normal file
53
tests/checker/test_urllen.py
Normal file
|
|
@ -0,0 +1,53 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2012 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test URL length checks.
|
||||
"""
|
||||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestURLLength(LinkCheckTest):
|
||||
"""
|
||||
Test URL lengths.
|
||||
"""
|
||||
|
||||
def test_url_warn(self):
|
||||
url = u"http://www.example.org/%s" % (u"a" * 256)
|
||||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = u"http://www.iana.org/domains/example/"
|
||||
resultlines = [
|
||||
u"url %(url)s" % attrs,
|
||||
u"cache key %(url)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"info Redirected to `%(nurl)s'." % attrs,
|
||||
u"warning URL length 279 is longer than 255.",
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
def test_url_error(self):
|
||||
url = u"http://www.example.org/%s" % ("a" * 2000)
|
||||
attrs = self.get_attrs(url=url)
|
||||
attrs['nurl'] = self.norm(url)
|
||||
resultlines = [
|
||||
u"url %(nurl)s" % attrs,
|
||||
u"cache key %(nurl)s" % attrs,
|
||||
u"real url %(nurl)s" % attrs,
|
||||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines)
|
||||
|
||||
Loading…
Reference in a new issue