From 1abd9ea10e421e549f235eb4c6be7143b157f197 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 5 Sep 2022 19:28:40 +0100 Subject: [PATCH 1/2] Skip tests in TestFile rather than silently returning --- tests/checker/test_file.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/checker/test_file.py b/tests/checker/test_file.py index 58ab5d33..272085cb 100644 --- a/tests/checker/test_file.py +++ b/tests/checker/test_file.py @@ -20,6 +20,8 @@ import os import sys import zipfile +import pytest + from tests import need_network, need_word, need_pdflib from . import LinkCheckTest, get_file @@ -89,7 +91,7 @@ class TestFile(LinkCheckTest): # unpack non-unicode filename which cannot be stored # in the SF subversion repository if os.name != "posix" or sys.platform != "linux2": - return + pytest.skip("Not running on POSIX or Linux") dirname = get_file("dir") if not os.path.isdir(dirname): unzip(dirname + ".zip", os.path.dirname(dirname)) @@ -115,7 +117,7 @@ class TestFile(LinkCheckTest): # Fails on NT platforms and I am too lazy to fix # Cause: url get quoted %7C which gets lowercased to # %7c and this fails. - return + pytest.skip("Not running on NT") url = "file:/%(curdir)s/%(datadir)s/file.txt" % self.get_attrs() nurl = self.norm(url) resultlines = [ From 3c7fb5b57152183fc70cce62c03ce6777c45ed9c Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Mon, 5 Sep 2022 19:28:40 +0100 Subject: [PATCH 2/2] Fix checking directory containing Unicode filenames Non-Unicode filenames are not supported. sys.platform has not returned "linux2" since Python 3.3. --- linkcheck/checker/__init__.py | 3 +++ linkcheck/checker/fileurl.py | 2 +- tests/checker/data/udir.result | 11 +++++++++++ tests/checker/test_file.py | 16 +++++++++++++++- 4 files changed, 30 insertions(+), 2 deletions(-) create mode 100644 tests/checker/data/udir.result diff --git a/linkcheck/checker/__init__.py b/linkcheck/checker/__init__.py index f2be0482..075075ce 100644 --- a/linkcheck/checker/__init__.py +++ b/linkcheck/checker/__init__.py @@ -188,6 +188,9 @@ def get_index_html(urls): name = html.escape(entry) try: url = html.escape(urllib.parse.quote(entry)) + except UnicodeEncodeError: + log.warn(LOG_CHECK, "Unable to convert entry to Unicode") + continue except KeyError: # Some unicode entries raise KeyError. url = name diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index b72a8bc5..59820060 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -234,7 +234,7 @@ class FileUrl(urlbase.UrlBase): with links to the files.""" if self.is_directory(): data = get_index_html(get_files(self.get_os_filename())) - data = data.encode("iso8859-1", "ignore") + data = data.encode() else: data = super().read_content() return data diff --git a/tests/checker/data/udir.result b/tests/checker/data/udir.result new file mode 100644 index 00000000..c1504b4a --- /dev/null +++ b/tests/checker/data/udir.result @@ -0,0 +1,11 @@ +url file://%(curdir)s/%(datadir)s/udir/ +cache key file://%(curdir)s/%(datadir)s/udir/ +real url file://%(curdir)s/%(datadir)s/udir/ +name %(datadir)s/udir +valid + +url %%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat +cache key file://%(curdir)s/%(datadir)s/udir/%%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat +real url file://%(curdir)s/%(datadir)s/udir/%%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat +name í»­¯¿.dat +valid diff --git a/tests/checker/test_file.py b/tests/checker/test_file.py index 272085cb..81b53f16 100644 --- a/tests/checker/test_file.py +++ b/tests/checker/test_file.py @@ -17,6 +17,7 @@ Test file parsing. """ import os +from pathlib import Path import sys import zipfile @@ -28,6 +29,10 @@ from . import LinkCheckTest, get_file def unzip(filename, targetdir): """Unzip given zipfile into targetdir.""" + # There are likely problems with zipfile and non-Unicode filenames + # https://github.com/python/cpython/issues/83042 + # https://github.com/python/cpython/issues/72267 + # https://github.com/python/cpython/issues/95463 zf = zipfile.ZipFile(filename) for name in zf.namelist(): if name.endswith("/"): @@ -87,16 +92,25 @@ class TestFile(LinkCheckTest): def test_urllist(self): self.file_test("urllist.txt") + @pytest.mark.xfail(strict=True) def test_directory_listing(self): # unpack non-unicode filename which cannot be stored # in the SF subversion repository - if os.name != "posix" or sys.platform != "linux2": + if os.name != "posix" or sys.platform != "linux": pytest.skip("Not running on POSIX or Linux") dirname = get_file("dir") if not os.path.isdir(dirname): unzip(dirname + ".zip", os.path.dirname(dirname)) self.file_test("dir") + def test_directory_listing_unicode(self): + if os.name != "posix" or sys.platform != "linux": + pytest.skip("Not running on POSIX or Linux") + dirname = Path(get_file("udir")) + dirname.mkdir(exist_ok=True) + Path(dirname, "í»­¯¿.dat").touch() + self.file_test("udir") + def test_unicode_filename(self): # a unicode filename self.file_test("Мошкова.bin")