mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-26 18:50:32 +00:00
Merge pull request #646 from cjmayo/unidir
Fix checking directory containing Unicode filenames
This commit is contained in:
commit
595ce32e55
4 changed files with 34 additions and 4 deletions
|
|
@ -188,6 +188,9 @@ def get_index_html(urls):
|
|||
name = html.escape(entry)
|
||||
try:
|
||||
url = html.escape(urllib.parse.quote(entry))
|
||||
except UnicodeEncodeError:
|
||||
log.warn(LOG_CHECK, "Unable to convert entry to Unicode")
|
||||
continue
|
||||
except KeyError:
|
||||
# Some unicode entries raise KeyError.
|
||||
url = name
|
||||
|
|
|
|||
|
|
@ -234,7 +234,7 @@ class FileUrl(urlbase.UrlBase):
|
|||
with links to the files."""
|
||||
if self.is_directory():
|
||||
data = get_index_html(get_files(self.get_os_filename()))
|
||||
data = data.encode("iso8859-1", "ignore")
|
||||
data = data.encode()
|
||||
else:
|
||||
data = super().read_content()
|
||||
return data
|
||||
|
|
|
|||
11
tests/checker/data/udir.result
Normal file
11
tests/checker/data/udir.result
Normal file
|
|
@ -0,0 +1,11 @@
|
|||
url file://%(curdir)s/%(datadir)s/udir/
|
||||
cache key file://%(curdir)s/%(datadir)s/udir/
|
||||
real url file://%(curdir)s/%(datadir)s/udir/
|
||||
name %(datadir)s/udir
|
||||
valid
|
||||
|
||||
url %%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat
|
||||
cache key file://%(curdir)s/%(datadir)s/udir/%%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat
|
||||
real url file://%(curdir)s/%(datadir)s/udir/%%C3%%AD%%C2%%BB%%C2%%AD%%C2%%AF%%C2%%BF.dat
|
||||
name í»¯¿.dat
|
||||
valid
|
||||
|
|
@ -17,15 +17,22 @@
|
|||
Test file parsing.
|
||||
"""
|
||||
import os
|
||||
from pathlib import Path
|
||||
import sys
|
||||
import zipfile
|
||||
|
||||
import pytest
|
||||
|
||||
from tests import need_network, need_word, need_pdflib
|
||||
from . import LinkCheckTest, get_file
|
||||
|
||||
|
||||
def unzip(filename, targetdir):
|
||||
"""Unzip given zipfile into targetdir."""
|
||||
# There are likely problems with zipfile and non-Unicode filenames
|
||||
# https://github.com/python/cpython/issues/83042
|
||||
# https://github.com/python/cpython/issues/72267
|
||||
# https://github.com/python/cpython/issues/95463
|
||||
zf = zipfile.ZipFile(filename)
|
||||
for name in zf.namelist():
|
||||
if name.endswith("/"):
|
||||
|
|
@ -85,16 +92,25 @@ class TestFile(LinkCheckTest):
|
|||
def test_urllist(self):
|
||||
self.file_test("urllist.txt")
|
||||
|
||||
@pytest.mark.xfail(strict=True)
|
||||
def test_directory_listing(self):
|
||||
# unpack non-unicode filename which cannot be stored
|
||||
# in the SF subversion repository
|
||||
if os.name != "posix" or sys.platform != "linux2":
|
||||
return
|
||||
if os.name != "posix" or sys.platform != "linux":
|
||||
pytest.skip("Not running on POSIX or Linux")
|
||||
dirname = get_file("dir")
|
||||
if not os.path.isdir(dirname):
|
||||
unzip(dirname + ".zip", os.path.dirname(dirname))
|
||||
self.file_test("dir")
|
||||
|
||||
def test_directory_listing_unicode(self):
|
||||
if os.name != "posix" or sys.platform != "linux":
|
||||
pytest.skip("Not running on POSIX or Linux")
|
||||
dirname = Path(get_file("udir"))
|
||||
dirname.mkdir(exist_ok=True)
|
||||
Path(dirname, "í»¯¿.dat").touch()
|
||||
self.file_test("udir")
|
||||
|
||||
def test_unicode_filename(self):
|
||||
# a unicode filename
|
||||
self.file_test("Мошкова.bin")
|
||||
|
|
@ -115,7 +131,7 @@ class TestFile(LinkCheckTest):
|
|||
# Fails on NT platforms and I am too lazy to fix
|
||||
# Cause: url get quoted %7C which gets lowercased to
|
||||
# %7c and this fails.
|
||||
return
|
||||
pytest.skip("Not running on NT")
|
||||
url = "file:/%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
|
||||
nurl = self.norm(url)
|
||||
resultlines = [
|
||||
|
|
|
|||
Loading…
Reference in a new issue