mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-28 10:04:43 +00:00
Fix URL result caching.
This commit is contained in:
parent
0113f06406
commit
6b334dc79b
18 changed files with 151 additions and 121 deletions
|
|
@ -7,7 +7,8 @@ Features:
|
|||
Closes: GH bug #467
|
||||
|
||||
Fixes:
|
||||
- checking: Fix internal error in debug output.
|
||||
- checking: Fix internal errors in debug output.
|
||||
Closes: GH bug #472
|
||||
|
||||
|
||||
9.0 "The Wolf of Wall Street" (released 3.3.2014)
|
||||
|
|
|
|||
18
linkcheck/cache/results.py
vendored
18
linkcheck/cache/results.py
vendored
|
|
@ -33,28 +33,28 @@ class ResultCache(object):
|
|||
format: {cache key (string) -> result (UrlData.towire())}
|
||||
"""
|
||||
|
||||
def __init__(self, max_size=10000):
|
||||
def __init__(self, max_size=100000):
|
||||
"""Initialize result cache."""
|
||||
# mapping {URL -> cached result}
|
||||
self.cache = {}
|
||||
self.max_size = max_size
|
||||
self.hits = self.misses = 0
|
||||
|
||||
@synchronized(cache_lock)
|
||||
def get_result(self, key):
|
||||
"""Return cached result or None if not found."""
|
||||
if key in self.cache:
|
||||
self.hits += 1
|
||||
return self.cache[key]
|
||||
self.misses += 1
|
||||
return None
|
||||
return self.cache.get(key)
|
||||
|
||||
@synchronized(cache_lock)
|
||||
def add_result(self, key, result):
|
||||
"""Add result object to cache with given key.
|
||||
The request is ignored when the cache is already full.
|
||||
The request is ignored when the cache is already full or the key
|
||||
is None.
|
||||
"""
|
||||
if len(self.cache) > self.max_size:
|
||||
return
|
||||
self.cache[key] = result
|
||||
if key is not None:
|
||||
self.cache[key] = result
|
||||
|
||||
def has_result(self, key):
|
||||
"""Non-thread-safe function for fast containment checks."""
|
||||
return key in self.cache
|
||||
|
|
|
|||
4
linkcheck/cache/robots_txt.py
vendored
4
linkcheck/cache/robots_txt.py
vendored
|
|
@ -80,9 +80,7 @@ class RobotsTxt (object):
|
|||
url_data.recursion_level+1, url_data.aggregate,
|
||||
parent_url=roboturl, line=line,
|
||||
parent_content_type=url_data.content_type)
|
||||
if sitemap_url_data.has_result or not sitemap_url_data.extern[1]:
|
||||
# Only queue URLs which have a result or are not strict extern.
|
||||
url_data.aggregate.urlqueue.put(sitemap_url_data)
|
||||
url_data.aggregate.urlqueue.put(sitemap_url_data)
|
||||
|
||||
@synchronized(robot_lock)
|
||||
def get_lock(self, roboturl):
|
||||
|
|
|
|||
36
linkcheck/cache/urlqueue.py
vendored
36
linkcheck/cache/urlqueue.py
vendored
|
|
@ -53,7 +53,6 @@ class UrlQueue (object):
|
|||
self.unfinished_tasks = 0
|
||||
self.finished_tasks = 0
|
||||
self.in_progress = 0
|
||||
self.seen = set()
|
||||
self.shutdown = False
|
||||
# Each put() decreases the number of allowed puts.
|
||||
# This way we can restrict the number of URLs that are checked.
|
||||
|
|
@ -107,8 +106,6 @@ class UrlQueue (object):
|
|||
"""Put an item into the queue.
|
||||
Block if necessary until a free slot is available.
|
||||
"""
|
||||
if self.put_denied(item):
|
||||
return
|
||||
with self.mutex:
|
||||
self._put(item)
|
||||
self.not_empty.notify()
|
||||
|
|
@ -117,35 +114,23 @@ class UrlQueue (object):
|
|||
"""Determine if put() will not append the item on the queue.
|
||||
@return True (reliable) or False (unreliable)
|
||||
"""
|
||||
if self.shutdown or self.allowed_puts == 0:
|
||||
return True
|
||||
if url_data.cache_key is not None and url_data.cache_key in self.seen:
|
||||
return True
|
||||
return False
|
||||
return self.shutdown or self.allowed_puts == 0
|
||||
|
||||
def _put (self, url_data):
|
||||
"""Put URL in queue, increase number of unfished tasks."""
|
||||
if self.shutdown:
|
||||
# don't accept more URLs
|
||||
if self.put_denied(url_data):
|
||||
return
|
||||
if self.allowed_puts is not None:
|
||||
if self.allowed_puts == 0:
|
||||
# no more puts allowed
|
||||
return
|
||||
self.allowed_puts -= 1
|
||||
log.debug(LOG_CACHE, "queueing %s", url_data)
|
||||
key = url_data.cache_key
|
||||
if key is not None:
|
||||
if key in self.seen:
|
||||
# don't check duplicate URLs
|
||||
return
|
||||
self.seen.add(key)
|
||||
self.unfinished_tasks += 1
|
||||
if url_data.has_result or \
|
||||
(key and key[1] in url_data.aggregate.result_cache.cache):
|
||||
log.debug(LOG_CACHE, "queueing %s", url_data.url)
|
||||
key = url_data.cache_url
|
||||
cache = url_data.aggregate.result_cache
|
||||
if url_data.has_result or cache.has_result(key):
|
||||
self.queue.appendleft(url_data)
|
||||
else:
|
||||
assert key is not None, "no result for None key: %s" % url_data
|
||||
self.queue.append(url_data)
|
||||
self.unfinished_tasks += 1
|
||||
|
||||
def task_done (self, url_data):
|
||||
"""
|
||||
|
|
@ -163,10 +148,7 @@ class UrlQueue (object):
|
|||
placed in the queue.
|
||||
"""
|
||||
with self.all_tasks_done:
|
||||
log.debug(LOG_CACHE, "task_done %s", url_data)
|
||||
# check for aliases (eg. through HTTP redirections)
|
||||
if hasattr(url_data, "aliases") and url_data.aliases:
|
||||
self.seen.update(url_data.aliases)
|
||||
log.debug(LOG_CACHE, "task_done %s", url_data.url)
|
||||
self.finished_tasks += 1
|
||||
self.unfinished_tasks -= 1
|
||||
self.in_progress -= 1
|
||||
|
|
|
|||
|
|
@ -44,8 +44,6 @@ class HttpUrl (internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
Initialize HTTP specific variables.
|
||||
"""
|
||||
super(HttpUrl, self).reset()
|
||||
# URLs seen through redirections
|
||||
self.aliases = []
|
||||
# initialize check data
|
||||
self.headers = {}
|
||||
self.auth = None
|
||||
|
|
|
|||
|
|
@ -288,13 +288,12 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
pass
|
||||
self.set_result(_("Valid mail address syntax"))
|
||||
|
||||
def set_cache_key(self):
|
||||
def set_cache_url(self):
|
||||
"""
|
||||
The cache key is a comma separated list of emails.
|
||||
The cache url is a comma separated list of emails.
|
||||
"""
|
||||
emails = u",".join(sorted(self.addresses))
|
||||
cache_url = u"%s:%s" % (self.scheme, emails)
|
||||
self.cache_key = (self.parent_url, cache_url)
|
||||
self.cache_url = u"%s:%s" % (self.scheme, emails)
|
||||
|
||||
def can_get_content (self):
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -29,8 +29,8 @@ import select
|
|||
from cStringIO import StringIO
|
||||
|
||||
from . import absolute_url, get_url_from
|
||||
from .. import (log, LOG_CHECK, LOG_CACHE,
|
||||
strformat, LinkCheckerError, url as urlutil, trace, get_link_pat, parser)
|
||||
from .. import (log, LOG_CHECK,
|
||||
strformat, LinkCheckerError, url as urlutil, trace, get_link_pat)
|
||||
from ..HtmlParser import htmlsax
|
||||
from ..htmlutil import linkparse
|
||||
from ..network import iputil
|
||||
|
|
@ -182,8 +182,8 @@ class UrlBase (object):
|
|||
self.url_connection = None
|
||||
# data of url content, (data == None) means no data is available
|
||||
self.data = None
|
||||
# cache key is set by build_url() calling set_cache_key()
|
||||
self.cache_key = None
|
||||
# cache url is set by build_url() calling set_cache_url()
|
||||
self.cache_url = None
|
||||
# extern flags (is_extern, is_strict)
|
||||
self.extern = None
|
||||
# flag if the result should be cached
|
||||
|
|
@ -194,6 +194,8 @@ class UrlBase (object):
|
|||
self.do_check_content = True
|
||||
# MIME content type
|
||||
self.content_type = u""
|
||||
# URLs seen through redirections
|
||||
self.aliases = []
|
||||
|
||||
def set_result (self, msg, valid=True, overwrite=False):
|
||||
"""
|
||||
|
|
@ -282,14 +284,11 @@ class UrlBase (object):
|
|||
if s not in self.info:
|
||||
self.info.append(s)
|
||||
|
||||
def set_cache_key (self):
|
||||
"""Set key for URL cache checking. A cache key consists of
|
||||
a tuple (source url, target url)."""
|
||||
def set_cache_url (self):
|
||||
"""Set the URL to be used for caching."""
|
||||
# remove anchor from cached target url since we assume
|
||||
# URLs with different anchors to have the same content
|
||||
cache_url = urlutil.urlunsplit(self.urlparts[:4]+[u''])
|
||||
self.cache_key = (self.parent_url, cache_url)
|
||||
log.debug(LOG_CACHE, "URL cache key %r", self.cache_key)
|
||||
self.cache_url = urlutil.urlunsplit(self.urlparts[:4]+[u''])
|
||||
|
||||
def check_syntax (self):
|
||||
"""
|
||||
|
|
@ -311,7 +310,7 @@ class UrlBase (object):
|
|||
except tuple(ExcSyntaxList) as msg:
|
||||
self.set_result(unicode_safe(msg), valid=False)
|
||||
else:
|
||||
self.set_cache_key()
|
||||
self.set_cache_url()
|
||||
|
||||
def check_url_warnings(self):
|
||||
"""Check URL name and length."""
|
||||
|
|
@ -414,15 +413,10 @@ class UrlBase (object):
|
|||
raise KeyboardInterrupt(value)
|
||||
else:
|
||||
raise
|
||||
finally:
|
||||
# close/release possible open connection
|
||||
self.close_connection()
|
||||
|
||||
def local_check (self):
|
||||
"""Local check function can be overridden in subclasses."""
|
||||
log.debug(LOG_CHECK, "Checking %s", self)
|
||||
# start time for check
|
||||
check_start = time.time()
|
||||
log.debug(LOG_CHECK, "Checking %s", unicode(self))
|
||||
# strict extern URLs should not be checked
|
||||
assert not self.extern[1], 'checking strict extern URL'
|
||||
# check connection
|
||||
|
|
@ -441,18 +435,23 @@ class UrlBase (object):
|
|||
# idna.encode(host) failed
|
||||
value = _('Bad hostname %(host)r: %(msg)s') % {'host': self.host, 'msg': str(value)}
|
||||
self.set_result(unicode_safe(value), valid=False)
|
||||
if self.do_check_content:
|
||||
|
||||
def check_content(self):
|
||||
"""Check content of URL.
|
||||
@return: True if content can be parsed, else False
|
||||
"""
|
||||
if self.do_check_content and self.valid:
|
||||
# check content and recursion
|
||||
try:
|
||||
if self.valid and self.can_get_content():
|
||||
if self.can_get_content():
|
||||
self.aggregate.plugin_manager.run_content_plugins(self)
|
||||
if self.allows_recursion():
|
||||
parser.parse_url(self)
|
||||
return True
|
||||
except tuple(ExcList):
|
||||
value = self.handle_exception()
|
||||
self.add_warning(_("could not get content: %(msg)s") %
|
||||
{"msg": str(value)}, tag=WARN_URL_ERROR_GETTING_CONTENT)
|
||||
self.checktime = time.time() - check_start
|
||||
return False
|
||||
|
||||
def close_connection (self):
|
||||
"""
|
||||
|
|
@ -648,7 +647,7 @@ class UrlBase (object):
|
|||
return self.aggregate.config.get_user_password(self.url)
|
||||
|
||||
def add_url (self, url, line=0, column=0, name=u"", base=None):
|
||||
"""Queue URL data for checking."""
|
||||
"""Add new URL to queue."""
|
||||
if base:
|
||||
base_ref = urlutil.url_norm(base)[0]
|
||||
else:
|
||||
|
|
@ -656,9 +655,7 @@ class UrlBase (object):
|
|||
url_data = get_url_from(url, self.recursion_level+1, self.aggregate,
|
||||
parent_url=self.url, base_ref=base_ref, line=line, column=column,
|
||||
name=name, parent_content_type=self.content_type)
|
||||
if url_data.has_result or not url_data.extern[1]:
|
||||
# Only queue URLs which have a result or are not strict extern.
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
self.aggregate.urlqueue.put(url_data)
|
||||
|
||||
def serialized (self, sep=os.linesep):
|
||||
"""
|
||||
|
|
@ -674,11 +671,8 @@ class UrlBase (object):
|
|||
assert isinstance(self.name, unicode), repr(self.name)
|
||||
if self.anchor is not None:
|
||||
assert isinstance(self.anchor, unicode), repr(self.anchor)
|
||||
if self.cache_key is not None:
|
||||
if self.cache_key[0] is not None:
|
||||
assert isinstance(self.cache_key[0], unicode), repr(self.cache_key[0])
|
||||
if self.cache_key[1] is not None:
|
||||
assert isinstance(self.cache_key[1], unicode), repr(self.cache_key[1])
|
||||
if self.cache_url is not None:
|
||||
assert isinstance(self.cache_url, unicode), repr(self.cache_url)
|
||||
return sep.join([
|
||||
u"%s link" % self.scheme,
|
||||
u"base_url=%r" % self.base_url,
|
||||
|
|
@ -690,7 +684,7 @@ class UrlBase (object):
|
|||
u"column=%d" % self.column,
|
||||
u"name=%r" % self.name,
|
||||
u"anchor=%r" % self.anchor,
|
||||
u"cache_key=%s" % repr(self.cache_key),
|
||||
u"cache_url=%s" % self.cache_url,
|
||||
])
|
||||
|
||||
def get_intern_pattern (self, url=None):
|
||||
|
|
@ -715,14 +709,23 @@ class UrlBase (object):
|
|||
{"domain": msg}
|
||||
self.set_result(res, valid=False)
|
||||
|
||||
def __str__ (self):
|
||||
def __unicode__(self):
|
||||
"""
|
||||
Get URL info.
|
||||
|
||||
@return: URL info
|
||||
@rtype: unicode
|
||||
"""
|
||||
return self.serialized()
|
||||
|
||||
def __str__(self):
|
||||
"""
|
||||
Get URL info.
|
||||
|
||||
@return: URL info, encoded with the output logger encoding
|
||||
@rtype: string
|
||||
"""
|
||||
s = self.serialized()
|
||||
s = unicode(self)
|
||||
return self.aggregate.config['logger'].encode(s)
|
||||
|
||||
def __repr__ (self):
|
||||
|
|
@ -766,8 +769,8 @@ class UrlBase (object):
|
|||
Line number of this URL at parent document, or -1
|
||||
- url_data.column: int
|
||||
Column number of this URL at parent document, or -1
|
||||
- url_data.cache_key: unicode
|
||||
Cache key for this URL.
|
||||
- url_data.cache_url: unicode
|
||||
Cache url for this URL.
|
||||
- url_data.content_type: unicode
|
||||
MIME content type for URL content.
|
||||
- url_data.level: int
|
||||
|
|
@ -792,7 +795,7 @@ class UrlBase (object):
|
|||
info=self.info,
|
||||
line=self.line,
|
||||
column=self.column,
|
||||
cache_key=self.cache_key,
|
||||
cache_url=self.cache_url,
|
||||
content_type=self.content_type,
|
||||
level=self.recursion_level,
|
||||
modified=self.modified,
|
||||
|
|
@ -823,7 +826,7 @@ urlDataAttr = [
|
|||
'modified',
|
||||
'line',
|
||||
'column',
|
||||
'cache_key',
|
||||
'cache_url',
|
||||
'content_type',
|
||||
'level',
|
||||
]
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ class Aggregate (object):
|
|||
t.start()
|
||||
else:
|
||||
self.request_sessions[thread.get_ident()] = new_request_session(self.config)
|
||||
checker.check_url(self.urlqueue, self.logger)
|
||||
checker.check_urls(self.urlqueue, self.logger)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def add_request_session(self):
|
||||
|
|
|
|||
|
|
@ -18,23 +18,63 @@
|
|||
URL checking functions.
|
||||
"""
|
||||
import copy
|
||||
import time
|
||||
from . import task
|
||||
from ..cache import urlqueue
|
||||
from .. import parser
|
||||
|
||||
|
||||
def check_url (urlqueue, logger):
|
||||
def check_urls (urlqueue, logger):
|
||||
"""Check URLs without threading."""
|
||||
while not urlqueue.empty():
|
||||
url_data = urlqueue.get()
|
||||
try:
|
||||
if not url_data.has_result:
|
||||
url_data.check()
|
||||
logger.log_url(url_data.to_wire())
|
||||
check_url(url_data, logger)
|
||||
finally:
|
||||
urlqueue.task_done(url_data)
|
||||
|
||||
|
||||
class Checker (task.LoggedCheckedTask):
|
||||
def check_url(url_data, logger):
|
||||
"""Check a single URL with logging."""
|
||||
if url_data.has_result:
|
||||
logger.log_url(url_data.to_wire())
|
||||
else:
|
||||
cache = url_data.aggregate.result_cache
|
||||
key = url_data.cache_url
|
||||
result = cache.get_result(key)
|
||||
if result is None:
|
||||
# check
|
||||
check_start = time.time()
|
||||
try:
|
||||
url_data.check()
|
||||
do_parse = url_data.check_content()
|
||||
url_data.checktime = time.time() - check_start
|
||||
# Add result to cache
|
||||
result = url_data.to_wire()
|
||||
cache.add_result(key, result)
|
||||
for alias in url_data.aliases:
|
||||
# redirect aliases
|
||||
cache.add_result(alias, result)
|
||||
# parse content recursively
|
||||
if do_parse:
|
||||
parser.parse_url(url_data)
|
||||
finally:
|
||||
# close/release possible open connection
|
||||
url_data.close_connection()
|
||||
else:
|
||||
# copy data from cache and adjust it
|
||||
result = copy.copy(result)
|
||||
result.parent_url = url_data.parent_url
|
||||
result.base_ref = url_data.base_ref or u""
|
||||
result.base_url = url_data.base_url or u""
|
||||
result.line = url_data.line
|
||||
result.column = url_data.column
|
||||
result.level = url_data.recursion_level
|
||||
result.name = url_data.name
|
||||
logger.log_url(result)
|
||||
|
||||
|
||||
class Checker(task.LoggedCheckedTask):
|
||||
"""URL check thread."""
|
||||
|
||||
def __init__ (self, urlqueue, logger, add_request_session):
|
||||
|
|
@ -71,17 +111,4 @@ class Checker (task.LoggedCheckedTask):
|
|||
else:
|
||||
url = url_data.url.encode("ascii", "replace")
|
||||
self.setName("CheckThread-%s" % url)
|
||||
if url_data.has_result:
|
||||
self.logger.log_url(url_data.to_wire())
|
||||
else:
|
||||
cache = url_data.aggregate.result_cache
|
||||
key = url_data.cache_key[1]
|
||||
result = cache.get_result(key)
|
||||
if result is None:
|
||||
url_data.check()
|
||||
result = url_data.to_wire()
|
||||
cache.add_result(key, result)
|
||||
else:
|
||||
result = copy.copy(result)
|
||||
result.parent_url = url_data.parent_url
|
||||
self.logger.log_url(result)
|
||||
check_url(url_data, self.logger)
|
||||
|
|
|
|||
|
|
@ -55,7 +55,8 @@ class BlacklistLogger (_Logger):
|
|||
"""
|
||||
Put invalid url in blacklist, delete valid url from blacklist.
|
||||
"""
|
||||
key = url_data.cache_key
|
||||
key = (url_data.parent_url, url_data.cache_url)
|
||||
key = repr(key)
|
||||
if key in self.blacklist:
|
||||
if url_data.valid:
|
||||
del self.blacklist[key]
|
||||
|
|
@ -90,7 +91,7 @@ class BlacklistLogger (_Logger):
|
|||
"""
|
||||
oldmask = os.umask(0077)
|
||||
for key, value in self.blacklist.items():
|
||||
self.write(u"%d %s%s" % (value, key, os.linesep))
|
||||
self.write(u"%d %s%s" % (value, repr(key), os.linesep))
|
||||
self.close_fileoutput()
|
||||
# restore umask
|
||||
os.umask(oldmask)
|
||||
|
|
|
|||
|
|
@ -67,7 +67,7 @@ class TestLogger (linkcheck.logger._Logger):
|
|||
url = u"url %s" % url_data.base_url
|
||||
self.result.append(url)
|
||||
if self.has_part('cachekey'):
|
||||
cache_key = url_data.cache_key[1] if url_data.cache_key else None
|
||||
cache_key = url_data.cache_url if url_data.cache_url else None
|
||||
self.result.append(u"cache key %s" % cache_key)
|
||||
if self.has_part('realurl'):
|
||||
self.result.append(u"real url %s" % url_data.url)
|
||||
|
|
|
|||
|
|
@ -1,2 +0,0 @@
|
|||
<a href="bl.html">link</a>
|
||||
<a href="el.html">External link</a>
|
||||
|
|
@ -3,6 +3,11 @@ cache key http://localhost:%(port)d/%(datadir)s/http.html
|
|||
real url http://localhost:%(port)d/%(datadir)s/http.html
|
||||
valid
|
||||
|
||||
url
|
||||
cache key http://localhost:%(port)d/%(datadir)s/http.html
|
||||
real url http://localhost:%(port)d/%(datadir)s/http.html
|
||||
valid
|
||||
|
||||
url dns://www.example.org
|
||||
cache key dns://www.example.org
|
||||
real url dns://www.example.org
|
||||
|
|
@ -37,11 +42,6 @@ real url http://localhost:%(port)d/?q=%%C3%%BC
|
|||
name html entities
|
||||
valid
|
||||
|
||||
url
|
||||
cache key http://localhost:%(port)d/tests/checker/data/http.html
|
||||
real url http://localhost:%(port)d/tests/checker/data/http.html
|
||||
valid
|
||||
|
||||
url file.css
|
||||
cache key http://localhost:%(port)d/tests/checker/data/file.css
|
||||
real url http://localhost:%(port)d/tests/checker/data/file.css
|
||||
|
|
|
|||
|
|
@ -9,3 +9,8 @@ real url http://www.example.com/
|
|||
name l1
|
||||
valid
|
||||
|
||||
url HTTP://WWW.EXAMPLE.COM/
|
||||
cache key http://www.example.com/
|
||||
real url http://www.example.com/
|
||||
name l2
|
||||
valid
|
||||
|
|
|
|||
|
|
@ -39,3 +39,8 @@ real url http://www.example.com/
|
|||
name ok example
|
||||
valid
|
||||
|
||||
url //www.example.com/
|
||||
cache key http://www.example.com/
|
||||
real url http://www.example.com/
|
||||
name no scheme example
|
||||
valid
|
||||
|
|
|
|||
|
|
@ -19,6 +19,11 @@ cache key file://%(curdir)s/%(datadir)s/favicon.ico
|
|||
real url file://%(curdir)s/%(datadir)s/favicon.ico
|
||||
valid
|
||||
|
||||
url favicon.ico
|
||||
cache key file://%(curdir)s/%(datadir)s/favicon.ico
|
||||
real url file://%(curdir)s/%(datadir)s/favicon.ico
|
||||
valid
|
||||
|
||||
url test.swf
|
||||
cache key file://%(curdir)s/%(datadir)s/test.swf
|
||||
real url file://%(curdir)s/%(datadir)s/test.swf
|
||||
|
|
|
|||
|
|
@ -134,7 +134,7 @@ class TestFile (LinkCheckTest):
|
|||
self.direct(url, resultlines)
|
||||
|
||||
def test_good_dir_space (self):
|
||||
url = u"file://%(curdir)s/%(datadir)s/a b/bl.html" % self.get_attrs()
|
||||
url = u"file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
url2 = u"file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()
|
||||
nurl2 = self.norm(url2)
|
||||
|
|
@ -145,15 +145,15 @@ class TestFile (LinkCheckTest):
|
|||
u"cache key %s" % nurl,
|
||||
u"real url %s" % nurl,
|
||||
u"valid",
|
||||
u"url bl.html",
|
||||
u"cache key %s" % nurl,
|
||||
u"real url %s" % nurl,
|
||||
u"name link",
|
||||
u"valid",
|
||||
u"url el.html",
|
||||
u"cache key %s" % nurl2,
|
||||
u"real url %s" % nurl2,
|
||||
u"name External link",
|
||||
u"name el.html",
|
||||
u"valid",
|
||||
u"url t.txt",
|
||||
u"cache key %s" % nurl3,
|
||||
u"real url %s" % nurl3,
|
||||
u"name t.txt",
|
||||
u"valid",
|
||||
u"url t.txt",
|
||||
u"cache key %s" % nurl3,
|
||||
|
|
|
|||
|
|
@ -58,9 +58,11 @@ class TestHttpRedirect (HttpServerTest):
|
|||
u"info Redirected to `%s'." % rurl,
|
||||
u"valid",
|
||||
u"url newurl.html",
|
||||
u"cache key %s" % rurl,
|
||||
u"cache key %s" % nurl,
|
||||
u"real url %s" % rurl,
|
||||
u"name Recursive Redirect",
|
||||
# XXX the info is copied from the cached result
|
||||
u"info Redirected to `%s'." % rurl,
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
|
@ -103,3 +105,9 @@ class TestHttpRedirect (HttpServerTest):
|
|||
u"error",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=99)
|
||||
|
||||
def redirect6(self):
|
||||
#max_redirect = 10
|
||||
# url = "http://httpbin.org/redirect/" + max_redirect --> valid
|
||||
# url = "http://httpbin.org/redirect/" + (max_redirect+1) --> error
|
||||
pass # XXX
|
||||
|
|
|
|||
Loading…
Reference in a new issue