From 76815bcf478f06bb98bdeec1fd7be208e08b41f2 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 13 Dec 2021 19:31:13 +0000 Subject: [PATCH] Don't guess the URL for files that end in .html Fixes: linkchecker ftp.html failing looking for ftp://ftp.html --- linkcheck/checker/__init__.py | 2 ++ tests/checker/test_guess_url.py | 37 +++++++++++++++++++++++++++++++++ 2 files changed, 39 insertions(+) create mode 100644 tests/checker/test_guess_url.py diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py index a8db1326..952f6997 100644 --- a/linkcheck/checker/__init__.py +++ b/linkcheck/checker/__init__.py @@ -35,6 +35,8 @@ def guess_url(url): a http respective ftp URL. @rtype: unicode """ + if url.lower().endswith(".html") and "/" not in url: + return url if url.lower().startswith("www."): # syntactic sugar return "http://%s" % url diff --git a/tests/checker/test_guess_url.py b/tests/checker/test_guess_url.py new file mode 100644 index 00000000..f04d820e --- /dev/null +++ b/tests/checker/test_guess_url.py @@ -0,0 +1,37 @@ +# Copyright (C) 2021 Chris Mayo +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# This program is distributed in the hope that it will be useful, +# but WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. +""" +Test guess_url. +""" + +import unittest +from linkcheck.checker import guess_url + + +class TestGuestUrl(unittest.TestCase): + """ + Test guess_url. + """ + + def test_guess_url(self): + url = "www.example.com" + self.assertEqual(guess_url(url), f"http://{url}") + url = "ftp.example.com" + self.assertEqual(guess_url(url), f"ftp://{url}") + url = "ftp.html" + self.assertEqual(guess_url(url), url) + url = "www.html" + self.assertEqual(guess_url(url), url)