Log robots.txt as the sitemap parent URL

This is the location the sitemap URL was found in. The line being
reported is the line in robots.txt.
This commit is contained in:
Chris Mayo 2022-10-17 19:21:03 +01:00
parent 7367e6e865
commit eab2fa410e
2 changed files with 3 additions and 3 deletions

View file

@ -77,7 +77,7 @@ class RobotsTxt:
log.warn(LOG_CACHE, _("Relative Sitemap %s in %s discarded"),
sitemap_url, roboturl)
continue
url_data.add_url(sitemap_url, line=line)
url_data.add_url(sitemap_url, line=line, parent=roboturl)
@synchronized(robot_lock)
def get_lock(self, roboturl):

View file

@ -793,7 +793,7 @@ class UrlBase:
return (split.username, split.password)
return self.aggregate.config.get_user_password(self.url)
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
def add_url(self, url, line=0, column=0, page=0, name="", base=None, parent=None):
"""Add new URL to queue."""
if base:
base_ref = urlutil.url_norm(base, encoding=self.content_encoding)[0]
@ -803,7 +803,7 @@ class UrlBase:
url,
self.recursion_level + 1,
self.aggregate,
parent_url=self.url,
parent_url=self.url if parent is None else parent,
base_ref=base_ref,
line=line,
column=column,