# Copyright (C) 2004-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ Test file parsing. """ import os import sys import zipfile import pytest from tests import need_word, need_pdflib from . import LinkCheckTest, get_file def unzip(filename, targetdir): """Unzip given zipfile into targetdir.""" if isinstance(targetdir, unicode): targetdir = str(targetdir) zf = zipfile.ZipFile(filename) for name in zf.namelist(): if name.endswith('/'): os.mkdir(os.path.join(targetdir, name), 0o700) else: outfile = open(os.path.join(targetdir, name), 'wb') try: outfile.write(zf.read(name)) finally: outfile.close() class TestFile(LinkCheckTest): """ Test file:// link checking (and file content parsing). """ def test_html (self): self.file_test("file.html") def test_html_url_quote (self): self.file_test("file_url_quote.html") def test_wml (self): self.file_test("file.wml") def test_text (self): self.file_test("file.txt") def test_asc (self): self.file_test("file.asc") def test_css (self): self.file_test("file.css") def test_php (self): self.file_test("file.php") @need_word def test_word (self): confargs = dict(enabledplugins=["WordParser"]) self.file_test("file.doc", confargs=confargs) @need_pdflib def test_pdf(self): confargs = dict(enabledplugins=["PdfParser"]) self.file_test("file.pdf", confargs=confargs) def test_markdown(self): confargs = dict(enabledplugins=["MarkdownCheck"]) self.file_test("file.markdown", confargs=confargs) def test_urllist (self): self.file_test("urllist.txt") @pytest.mark.xfail def test_directory_listing (self): # unpack non-unicode filename which cannot be stored # in the SF subversion repository if os.name != 'posix' or sys.platform != 'linux2': return dirname = get_file("dir") if not os.path.isdir(dirname): unzip(dirname + ".zip", os.path.dirname(dirname)) self.file_test("dir") def test_unicode_filename (self): # a unicode filename self.file_test("Мошкова.bin") def test_good_file (self): url = "file://%(curdir)s/%(datadir)s/file.txt" % self.get_attrs() nurl = self.norm(url) resultlines = [ "url %s" % url, "cache key %s" % nurl, "real url %s" % nurl, "valid", ] self.direct(url, resultlines) def test_bad_file (self): if os.name == 'nt': # Fails on NT platforms and I am too lazy to fix # Cause: url get quoted %7C which gets lowercased to # %7c and this fails. return url = "file:/%(curdir)s/%(datadir)s/file.txt" % self.get_attrs() nurl = self.norm(url) resultlines = [ "url %s" % url, "cache key %s" % nurl, "real url %s" % nurl, "error", ] self.direct(url, resultlines) def test_good_file_missing_dslash (self): # good file (missing double slash) attrs = self.get_attrs() url = "file:%(curdir)s/%(datadir)s/file.txt" % attrs resultlines = [ "url %s" % url, "cache key file://%(curdir)s/%(datadir)s/file.txt" % attrs, "real url file://%(curdir)s/%(datadir)s/file.txt" % attrs, "valid", ] self.direct(url, resultlines) def test_good_dir (self): url = "file://%(curdir)s/%(datadir)s/" % self.get_attrs() resultlines = [ "url %s" % url, "cache key %s" % url, "real url %s" % url, "valid", ] self.direct(url, resultlines) def test_good_dir_space (self): url = "file://%(curdir)s/%(datadir)s/a b/" % self.get_attrs() nurl = self.norm(url) url2 = "file://%(curdir)s/%(datadir)s/a b/el.html" % self.get_attrs() nurl2 = self.norm(url2) url3 = "file://%(curdir)s/%(datadir)s/a b/t.txt" % self.get_attrs() nurl3 = self.norm(url3) resultlines = [ "url %s" % url, "cache key %s" % nurl, "real url %s" % nurl, "valid", "url el.html", "cache key %s" % nurl2, "real url %s" % nurl2, "name el.html", "valid", "url t.txt", "cache key %s" % nurl3, "real url %s" % nurl3, "name t.txt", "valid", ] self.direct(url, resultlines, recursionlevel=2)