diff --git a/linkcheck/url.py b/linkcheck/url.py index 46e1efd3..fa16bde4 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -601,7 +601,13 @@ def is_duplicate_content_url(url1, url2): if url1 == url2: return True if url2 in url1: - return shorten_duplicate_content_url(url1) == url2 + url1 = shorten_duplicate_content_url(url1) + if not url2.endswith('/') and url1.endswith('/'): + url2 += '/' + return url1 == url2 if url1 in url2: - return shorten_duplicate_content_url(url2) == url1 + url2 = shorten_duplicate_content_url(url2) + if not url1.endswith('/') and url2.endswith('/'): + url1 += '/' + return url1 == url2 return False diff --git a/tests/test_url.py b/tests/test_url.py index 0c07a004..fdb663c9 100644 --- a/tests/test_url.py +++ b/tests/test_url.py @@ -554,3 +554,13 @@ class TestUrl (unittest.TestCase): @need_network def test_get_content (self): linkcheck.url.get_content('http://www.debian.org/') + + def test_duplicate_urls(self): + is_dup = linkcheck.url.is_duplicate_content_url + self.assertTrue(is_dup("http://example.org", "http://example.org")) + self.assertTrue(is_dup("http://example.org/", "http://example.org")) + self.assertTrue(is_dup("http://example.org", "http://example.org/")) + self.assertTrue(is_dup("http://example.org/index.html", "http://example.org")) + self.assertTrue(is_dup("http://example.org", "http://example.org/index.html")) + self.assertTrue(is_dup("http://example.org/index.htm", "http://example.org")) + self.assertTrue(is_dup("http://example.org", "http://example.org/index.htm"))