From 7ba40537106f73acaf8dda852ce5f40eca3d027a Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 7 Aug 2020 20:04:23 +0100 Subject: [PATCH 1/2] Fix critical exception if srcset value ends with a comma Log a debug message as this is a minor syntax problem, won't stop LinkChecker parsing strings up to the comma. --- linkcheck/htmlutil/linkparse.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py index 0e306ff3..682c2e20 100644 --- a/linkcheck/htmlutil/linkparse.py +++ b/linkcheck/htmlutil/linkparse.py @@ -201,8 +201,17 @@ class LinkFinder: self.found_url(url, name, base, lineno, column) elif attr == 'srcset': for img_candidate in value.split(','): - url = img_candidate.split()[0] - self.found_url(url, name, base, lineno, column) + try: + url = img_candidate.split()[0] + except IndexError: + log.debug( + LOG_CHECK, + _("trailing comma in line: " + "%(line)s srcset attribute: %(value)s") + % {"line": lineno, "value": value} + ) + else: + self.found_url(url, name, base, lineno, column) else: self.found_url(value, name, base, lineno, column) From 27f22ae17a97feccb39ebc34020f520cabd80fb3 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Fri, 7 Aug 2020 20:04:23 +0100 Subject: [PATCH 2/2] Fix treating data: URIs in srcset values as links --- linkcheck/htmlutil/linkparse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/htmlutil/linkparse.py b/linkcheck/htmlutil/linkparse.py index 682c2e20..9c262f48 100644 --- a/linkcheck/htmlutil/linkparse.py +++ b/linkcheck/htmlutil/linkparse.py @@ -199,7 +199,7 @@ class LinkFinder: elif attr == 'archive': for url in value.split(','): self.found_url(url, name, base, lineno, column) - elif attr == 'srcset': + elif attr == 'srcset' and not value.startswith('data:'): for img_candidate in value.split(','): try: url = img_candidate.split()[0]