mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-01 05:30:26 +00:00
Refactor recursion checks.
This commit is contained in:
parent
08fbd891ef
commit
22caa9367a
2 changed files with 13 additions and 12 deletions
7
linkcheck/cache/robots_txt.py
vendored
7
linkcheck/cache/robots_txt.py
vendored
|
|
@ -69,12 +69,7 @@ class RobotsTxt (object):
|
|||
|
||||
def add_sitemap_urls(self, rp, url_data, roboturl):
|
||||
"""Add sitemap URLs to queue."""
|
||||
if not rp.sitemap_urls:
|
||||
return
|
||||
rec_level = url_data.aggregate.config["recursionlevel"]
|
||||
if rec_level >= 0 and url_data.recursion_level >= rec_level:
|
||||
return
|
||||
if url_data.extern[0]:
|
||||
if not rp.sitemap_urls or not url_data.allows_simple_recursion():
|
||||
return
|
||||
for sitemap_url, line in rp.sitemap_urls:
|
||||
url_data.add_url(sitemap_url, line=line)
|
||||
|
|
|
|||
|
|
@ -519,6 +519,17 @@ class UrlBase (object):
|
|||
maxbytes=strformat.strsize(maxbytes)),
|
||||
tag=WARN_URL_CONTENT_SIZE_TOO_LARGE)
|
||||
|
||||
def allows_simple_recursion(self):
|
||||
"""Check recursion level and extern status."""
|
||||
rec_level = self.aggregate.config["recursionlevel"]
|
||||
if rec_level >= 0 and self.recursion_level >= rec_level:
|
||||
log.debug(LOG_CHECK, "... no, maximum recursion level reached.")
|
||||
return False
|
||||
if self.extern[0]:
|
||||
log.debug(LOG_CHECK, "... no, extern.")
|
||||
return False
|
||||
return True
|
||||
|
||||
def allows_recursion (self):
|
||||
"""
|
||||
Return True iff we can recurse into the url's content.
|
||||
|
|
@ -530,12 +541,7 @@ class UrlBase (object):
|
|||
if not self.can_get_content():
|
||||
log.debug(LOG_CHECK, "... no, cannot get content.")
|
||||
return False
|
||||
rec_level = self.aggregate.config["recursionlevel"]
|
||||
if rec_level >= 0 and self.recursion_level >= rec_level:
|
||||
log.debug(LOG_CHECK, "... no, maximum recursion level reached.")
|
||||
return False
|
||||
if self.extern[0]:
|
||||
log.debug(LOG_CHECK, "... no, extern.")
|
||||
if not self.allows_simple_recursion():
|
||||
return False
|
||||
if self.size > self.aggregate.config["maxfilesizeparse"]:
|
||||
log.debug(LOG_CHECK, "... no, maximum parse size.")
|
||||
|
|
|
|||
Loading…
Reference in a new issue