2014-01-08 21:33:04 +00:00
|
|
|
# Copyright (C) 2004-2014 Bastian Kleineidam
|
2005-12-07 21:55:16 +00:00
|
|
|
#
|
|
|
|
|
# This program is free software; you can redistribute it and/or modify
|
|
|
|
|
# it under the terms of the GNU General Public License as published by
|
|
|
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
|
|
|
# (at your option) any later version.
|
|
|
|
|
#
|
|
|
|
|
# This program is distributed in the hope that it will be useful,
|
|
|
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
|
|
|
# GNU General Public License for more details.
|
|
|
|
|
#
|
2009-07-24 21:58:20 +00:00
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
2005-12-07 21:55:16 +00:00
|
|
|
"""
|
|
|
|
|
Test file parsing.
|
|
|
|
|
"""
|
|
|
|
|
import os
|
2011-12-18 07:12:23 +00:00
|
|
|
import sys
|
2008-11-27 19:23:40 +00:00
|
|
|
import zipfile
|
2017-02-01 16:45:05 +00:00
|
|
|
|
|
|
|
|
import pytest
|
|
|
|
|
|
2020-05-25 18:55:28 +00:00
|
|
|
from tests import need_network, need_word, need_pdflib
|
2008-11-27 19:23:40 +00:00
|
|
|
from . import LinkCheckTest, get_file
|
|
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def unzip(filename, targetdir):
|
2008-11-27 19:23:40 +00:00
|
|
|
"""Unzip given zipfile into targetdir."""
|
|
|
|
|
zf = zipfile.ZipFile(filename)
|
|
|
|
|
for name in zf.namelist():
|
2020-05-28 19:29:13 +00:00
|
|
|
if name.endswith("/"):
|
2018-01-05 17:23:37 +00:00
|
|
|
os.mkdir(os.path.join(targetdir, name), 0o700)
|
2008-11-27 19:23:40 +00:00
|
|
|
else:
|
2020-05-28 19:29:13 +00:00
|
|
|
outfile = open(os.path.join(targetdir, name), "wb")
|
2008-11-27 19:23:40 +00:00
|
|
|
try:
|
|
|
|
|
outfile.write(zf.read(name))
|
|
|
|
|
finally:
|
|
|
|
|
outfile.close()
|
2005-12-07 21:55:16 +00:00
|
|
|
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
class TestFile(LinkCheckTest):
|
2005-12-07 21:55:16 +00:00
|
|
|
"""
|
|
|
|
|
Test file:// link checking (and file content parsing).
|
|
|
|
|
"""
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_html(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
self.file_test("file.html")
|
|
|
|
|
|
2020-05-25 18:55:28 +00:00
|
|
|
@need_network
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_html_url_quote(self):
|
2019-10-05 18:38:57 +00:00
|
|
|
self.file_test("file_url_quote.html")
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_wml(self):
|
2012-08-22 20:43:14 +00:00
|
|
|
self.file_test("file.wml")
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_text(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
self.file_test("file.txt")
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_asc(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
self.file_test("file.asc")
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_css(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
self.file_test("file.css")
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_php(self):
|
2010-12-28 16:11:29 +00:00
|
|
|
self.file_test("file.php")
|
|
|
|
|
|
2020-05-21 16:01:33 +00:00
|
|
|
def test_empty(self):
|
|
|
|
|
self.file_test("empty.html")
|
|
|
|
|
|
2013-01-09 22:02:47 +00:00
|
|
|
@need_word
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_word(self):
|
2014-04-29 16:53:24 +00:00
|
|
|
confargs = dict(enabledplugins=["WordParser"])
|
|
|
|
|
self.file_test("file.doc", confargs=confargs)
|
|
|
|
|
|
|
|
|
|
@need_pdflib
|
|
|
|
|
def test_pdf(self):
|
|
|
|
|
confargs = dict(enabledplugins=["PdfParser"])
|
|
|
|
|
self.file_test("file.pdf", confargs=confargs)
|
2013-01-09 22:02:47 +00:00
|
|
|
|
2014-11-11 13:35:18 +00:00
|
|
|
def test_markdown(self):
|
|
|
|
|
confargs = dict(enabledplugins=["MarkdownCheck"])
|
|
|
|
|
self.file_test("file.markdown", confargs=confargs)
|
|
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_urllist(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
self.file_test("urllist.txt")
|
|
|
|
|
|
2017-02-01 16:45:05 +00:00
|
|
|
@pytest.mark.xfail
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_directory_listing(self):
|
2008-11-27 19:23:40 +00:00
|
|
|
# unpack non-unicode filename which cannot be stored
|
|
|
|
|
# in the SF subversion repository
|
2020-05-28 19:29:13 +00:00
|
|
|
if os.name != "posix" or sys.platform != "linux2":
|
2009-01-08 23:06:19 +00:00
|
|
|
return
|
2008-11-27 19:23:40 +00:00
|
|
|
dirname = get_file("dir")
|
|
|
|
|
if not os.path.isdir(dirname):
|
2010-03-13 07:47:12 +00:00
|
|
|
unzip(dirname + ".zip", os.path.dirname(dirname))
|
2008-11-27 19:23:40 +00:00
|
|
|
self.file_test("dir")
|
2008-11-20 18:48:23 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_unicode_filename(self):
|
2009-11-28 11:04:02 +00:00
|
|
|
# a unicode filename
|
2020-04-30 19:11:59 +00:00
|
|
|
self.file_test("Мошкова.bin")
|
2009-11-28 11:04:02 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_good_file(self):
|
2020-04-30 19:11:59 +00:00
|
|
|
url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
|
2006-05-18 21:43:55 +00:00
|
|
|
nurl = self.norm(url)
|
2005-12-07 21:55:16 +00:00
|
|
|
resultlines = [
|
2020-04-30 19:11:59 +00:00
|
|
|
"url %s" % url,
|
|
|
|
|
"cache key %s" % nurl,
|
|
|
|
|
"real url %s" % nurl,
|
|
|
|
|
"valid",
|
2005-12-07 21:55:16 +00:00
|
|
|
]
|
|
|
|
|
self.direct(url, resultlines)
|
2006-05-29 13:54:40 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_bad_file(self):
|
2020-05-28 19:29:13 +00:00
|
|
|
if os.name == "nt":
|
2006-05-29 16:08:37 +00:00
|
|
|
# Fails on NT platforms and I am too lazy to fix
|
|
|
|
|
# Cause: url get quoted %7C which gets lowercased to
|
|
|
|
|
# %7c and this fails.
|
|
|
|
|
return
|
2020-04-30 19:11:59 +00:00
|
|
|
url = "file:/%(curdir)s/%(datadir)s/file.txt" % self.get_attrs()
|
2006-05-18 21:43:55 +00:00
|
|
|
nurl = self.norm(url)
|
2005-12-07 21:55:16 +00:00
|
|
|
resultlines = [
|
2020-04-30 19:11:59 +00:00
|
|
|
"url %s" % url,
|
|
|
|
|
"cache key %s" % nurl,
|
|
|
|
|
"real url %s" % nurl,
|
|
|
|
|
"error",
|
2005-12-07 21:55:16 +00:00
|
|
|
]
|
|
|
|
|
self.direct(url, resultlines)
|
2006-05-29 13:54:40 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_good_file_missing_dslash(self):
|
2005-12-07 21:55:16 +00:00
|
|
|
# good file (missing double slash)
|
2007-11-28 18:46:50 +00:00
|
|
|
attrs = self.get_attrs()
|
2020-04-30 19:11:59 +00:00
|
|
|
url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs
|
2005-12-07 21:55:16 +00:00
|
|
|
resultlines = [
|
2020-04-30 19:11:59 +00:00
|
|
|
"url %s" % url,
|
|
|
|
|
"cache key file://%(curdir)s/%(datadir)s/file.txt" % attrs,
|
|
|
|
|
"real url file://%(curdir)s/%(datadir)s/file.txt" % attrs,
|
|
|
|
|
"valid",
|
2005-12-07 21:55:16 +00:00
|
|
|
]
|
|
|
|
|
self.direct(url, resultlines)
|
2006-05-29 13:54:40 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_good_dir(self):
|
2020-04-30 19:11:59 +00:00
|
|
|
url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs()
|
2005-12-07 21:55:16 +00:00
|
|
|
resultlines = [
|
2020-04-30 19:11:59 +00:00
|
|
|
"url %s" % url,
|
|
|
|
|
"cache key %s" % url,
|
|
|
|
|
"real url %s" % url,
|
|
|
|
|
"valid",
|
2005-12-07 21:55:16 +00:00
|
|
|
]
|
|
|
|
|
self.direct(url, resultlines)
|
2010-12-21 06:05:12 +00:00
|
|
|
|
2020-05-16 19:19:42 +00:00
|
|
|
def test_good_dir_space(self):
|
2020-04-30 19:11:59 +00:00
|
|
|
url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs()
|
2010-12-21 06:05:12 +00:00
|
|
|
nurl = self.norm(url)
|
2020-04-30 19:11:59 +00:00
|
|
|
url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs()
|
2010-12-21 20:10:31 +00:00
|
|
|
nurl2 = self.norm(url2)
|
2020-04-30 19:11:59 +00:00
|
|
|
url3 = "file://%(curdir)s/%(datadir)s/a b/t.txt" % self.get_attrs()
|
2014-02-28 23:12:34 +00:00
|
|
|
nurl3 = self.norm(url3)
|
2010-12-21 06:05:12 +00:00
|
|
|
resultlines = [
|
2020-04-30 19:11:59 +00:00
|
|
|
"url %s" % url,
|
|
|
|
|
"cache key %s" % nurl,
|
|
|
|
|
"real url %s" % nurl,
|
|
|
|
|
"valid",
|
|
|
|
|
"url el.html",
|
|
|
|
|
"cache key %s" % nurl2,
|
|
|
|
|
"real url %s" % nurl2,
|
|
|
|
|
"name el.html",
|
|
|
|
|
"valid",
|
|
|
|
|
"url t.txt",
|
|
|
|
|
"cache key %s" % nurl3,
|
|
|
|
|
"real url %s" % nurl3,
|
|
|
|
|
"name t.txt",
|
|
|
|
|
"valid",
|
2010-12-21 06:05:12 +00:00
|
|
|
]
|
2010-12-21 20:10:31 +00:00
|
|
|
self.direct(url, resultlines, recursionlevel=2)
|