mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-16 22:10:26 +00:00
Log robots.txt as the sitemap parent URL
This is the location the sitemap URL was found in. The line being reported is the line in robots.txt.
This commit is contained in:
parent
7367e6e865
commit
eab2fa410e
2 changed files with 3 additions and 3 deletions
2
linkcheck/cache/robots_txt.py
vendored
2
linkcheck/cache/robots_txt.py
vendored
|
|
@ -77,7 +77,7 @@ class RobotsTxt:
|
|||
log.warn(LOG_CACHE, _("Relative Sitemap %s in %s discarded"),
|
||||
sitemap_url, roboturl)
|
||||
continue
|
||||
url_data.add_url(sitemap_url, line=line)
|
||||
url_data.add_url(sitemap_url, line=line, parent=roboturl)
|
||||
|
||||
@synchronized(robot_lock)
|
||||
def get_lock(self, roboturl):
|
||||
|
|
|
|||
|
|
@ -793,7 +793,7 @@ class UrlBase:
|
|||
return (split.username, split.password)
|
||||
return self.aggregate.config.get_user_password(self.url)
|
||||
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None, parent=None):
|
||||
"""Add new URL to queue."""
|
||||
if base:
|
||||
base_ref = urlutil.url_norm(base, encoding=self.content_encoding)[0]
|
||||
|
|
@ -803,7 +803,7 @@ class UrlBase:
|
|||
url,
|
||||
self.recursion_level + 1,
|
||||
self.aggregate,
|
||||
parent_url=self.url,
|
||||
parent_url=self.url if parent is None else parent,
|
||||
base_ref=base_ref,
|
||||
line=line,
|
||||
column=column,
|
||||
|
|
|
|||
Loading…
Reference in a new issue