Don't guess the URL for files that end in .html

Fixes:
linkchecker ftp.html
failing looking for ftp://ftp.html
This commit is contained in:
Chris Mayo 2021-12-13 19:31:13 +00:00
parent 7f175c13d4
commit 76815bcf47
2 changed files with 39 additions and 0 deletions

View file

@ -35,6 +35,8 @@ def guess_url(url):
a http respective ftp URL.
@rtype: unicode
"""
if url.lower().endswith(".html") and "/" not in url:
return url
if url.lower().startswith("www."):
# syntactic sugar
return "http://%s" % url

View file

@ -0,0 +1,37 @@
# Copyright (C) 2021 Chris Mayo
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
Test guess_url.
"""
import unittest
from linkcheck.checker import guess_url
class TestGuestUrl(unittest.TestCase):
"""
Test guess_url.
"""
def test_guess_url(self):
url = "www.example.com"
self.assertEqual(guess_url(url), f"http://{url}")
url = "ftp.example.com"
self.assertEqual(guess_url(url), f"ftp://{url}")
url = "ftp.html"
self.assertEqual(guess_url(url), url)
url = "www.html"
self.assertEqual(guess_url(url), url)