mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-14 03:11:03 +00:00
Improved duplication url check.
This commit is contained in:
parent
b758fc6f52
commit
c4e15c7b88
2 changed files with 18 additions and 2 deletions
|
|
@ -601,7 +601,13 @@ def is_duplicate_content_url(url1, url2):
|
|||
if url1 == url2:
|
||||
return True
|
||||
if url2 in url1:
|
||||
return shorten_duplicate_content_url(url1) == url2
|
||||
url1 = shorten_duplicate_content_url(url1)
|
||||
if not url2.endswith('/') and url1.endswith('/'):
|
||||
url2 += '/'
|
||||
return url1 == url2
|
||||
if url1 in url2:
|
||||
return shorten_duplicate_content_url(url2) == url1
|
||||
url2 = shorten_duplicate_content_url(url2)
|
||||
if not url1.endswith('/') and url2.endswith('/'):
|
||||
url1 += '/'
|
||||
return url1 == url2
|
||||
return False
|
||||
|
|
|
|||
|
|
@ -554,3 +554,13 @@ class TestUrl (unittest.TestCase):
|
|||
@need_network
|
||||
def test_get_content (self):
|
||||
linkcheck.url.get_content('http://www.debian.org/')
|
||||
|
||||
def test_duplicate_urls(self):
|
||||
is_dup = linkcheck.url.is_duplicate_content_url
|
||||
self.assertTrue(is_dup("http://example.org", "http://example.org"))
|
||||
self.assertTrue(is_dup("http://example.org/", "http://example.org"))
|
||||
self.assertTrue(is_dup("http://example.org", "http://example.org/"))
|
||||
self.assertTrue(is_dup("http://example.org/index.html", "http://example.org"))
|
||||
self.assertTrue(is_dup("http://example.org", "http://example.org/index.html"))
|
||||
self.assertTrue(is_dup("http://example.org/index.htm", "http://example.org"))
|
||||
self.assertTrue(is_dup("http://example.org", "http://example.org/index.htm"))
|
||||
|
|
|
|||
Loading…
Reference in a new issue