linkchecker/linkcheck/fileutil.py
2014-03-05 19:26:37 +01:00

308 lines
9.7 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2005-2014 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""
File and path utilities.
"""
import os
import re
import locale
import stat
import fnmatch
import mimetypes
import tempfile
import importlib
from distutils.spawn import find_executable
from .decorators import memoized
from . import log, LOG_CHECK
def write_file (filename, content, backup=False, callback=None):
"""Overwrite a possibly existing file with new content. Do this
in a manner that does not leave truncated or broken files behind.
@param filename: name of file to write
@type filename: string
@param content: file content to write
@type content: string
@param backup: if backup file should be left
@type backup: bool
@param callback: non-default storage function
@type callback: None or function taking two parameters (fileobj, content)
"""
# first write in a temp file
f = file(filename+".tmp", 'wb')
if callback is None:
f.write(content)
else:
callback(f, content)
f.close()
# move orig file to backup
if os.path.exists(filename):
os.rename(filename, filename+".bak")
# move temp file to orig
os.rename(filename+".tmp", filename)
# remove backup
if not backup and os.path.exists(filename+".bak"):
os.remove(filename+".bak")
def has_module (name, without_error=True):
"""Test if given module can be imported.
@param without_error: True if module must not throw any errors when importing
@return: flag if import is successful
@rtype: bool
"""
try:
importlib.import_module(name)
return True
except ImportError:
return False
except Exception:
# some modules raise errors when intitializing
return not without_error
class GlobDirectoryWalker (object):
"""A forward iterator that traverses a directory tree."""
def __init__ (self, directory, pattern="*"):
"""Set start directory and pattern matcher."""
self.stack = [directory]
self.pattern = pattern
self.files = []
self.index = 0
def __getitem__ (self, index):
"""Search for next filename."""
while True:
try:
filename = self.files[self.index]
self.index += 1
except IndexError:
# Pop next directory from stack. This effectively
# stops the iteration if stack is empty.
self.directory = self.stack.pop()
self.files = os.listdir(self.directory)
self.index = 0
else:
# got a filename
fullname = os.path.join(self.directory, filename)
if os.path.isdir(fullname) and not os.path.islink(fullname):
self.stack.append(fullname)
if fnmatch.fnmatch(filename, self.pattern):
return fullname
# alias
rglob = GlobDirectoryWalker
class Buffer (object):
"""Holds buffered data"""
def __init__ (self, empty=''):
"""Initialize buffer."""
self.empty = self.buf = empty
self.tmpbuf = []
self.pos = 0
def __len__ (self):
"""Buffer length."""
return self.pos
def write (self, data):
"""Write data to buffer."""
self.tmpbuf.append(data)
self.pos += len(data)
def flush (self, overlap=0):
"""Flush buffered data and return it."""
self.buf += self.empty.join(self.tmpbuf)
self.tmpbuf = []
if overlap and overlap < self.pos:
data = self.buf[:-overlap]
self.buf = self.buf[-overlap:]
else:
data = self.buf
self.buf = self.empty
return data
def get_mtime (filename):
"""Return modification time of filename or zero on errors."""
try:
return os.path.getmtime(filename)
except os.error:
return 0
def get_size (filename):
"""Return file size in Bytes, or -1 on error."""
try:
return os.path.getsize(filename)
except os.error:
return -1
# http://developer.gnome.org/doc/API/2.0/glib/glib-running.html
if "G_FILENAME_ENCODING" in os.environ:
FSCODING = os.environ["G_FILENAME_ENCODING"].split(",")[0]
if FSCODING == "@locale":
FSCODING = locale.getpreferredencoding()
elif "G_BROKEN_FILENAMES" in os.environ:
FSCODING = locale.getpreferredencoding()
else:
FSCODING = "utf-8"
def pathencode (path):
"""Encode a path string with the platform file system encoding."""
if isinstance(path, unicode) and not os.path.supports_unicode_filenames:
path = path.encode(FSCODING, "replace")
return path
# cache for modified check {absolute filename -> mtime}
_mtime_cache = {}
def has_changed (filename):
"""Check if filename has changed since the last check. If this
is the first check, assume the file is changed."""
key = os.path.abspath(filename)
mtime = get_mtime(key)
if key not in _mtime_cache:
_mtime_cache[key] = mtime
return True
return mtime > _mtime_cache[key]
mimedb = None
def init_mimedb():
"""Initialize the local MIME database."""
global mimedb
try:
mimedb = mimetypes.MimeTypes(strict=False)
except StandardError as msg:
log.error(LOG_CHECK, "could not initialize MIME database: %s" % msg)
return
# For Opera bookmark files (opera6.adr)
add_mimetype(mimedb, 'text/plain', '.adr')
# To recognize PHP files as HTML with content check.
add_mimetype(mimedb, 'application/x-httpd-php', '.php')
# To recognize WML files
add_mimetype(mimedb, 'text/vnd.wap.wml', '.wml')
def add_mimetype(mimedb, mimetype, extension):
"""Add or replace a mimetype to be used with the given extension."""
# If extension is already a common type, strict=True must be used.
strict = extension in mimedb.types_map[True]
mimedb.add_type(mimetype, extension, strict=strict)
# if file extension lookup was unsuccessful, look at the content
PARSE_CONTENTS = {
"text/html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'),
"text/plain+opera": re.compile(r'^Opera Hotlist'),
"text/plain+chromium": re.compile(r'^{\s*"checksum":'),
"text/plain+linkchecker": re.compile(r'(?i)^# LinkChecker URL list'),
"application/xml+sitemapindex": re.compile(r'(?i)<\?xml[^<]+<sitemapindex\s+'),
"application/xml+sitemap": re.compile(r'(?i)<\?xml[^<]+<urlset\s+'),
}
def guess_mimetype (filename, read=None):
"""Return MIME type of file, or 'application/octet-stream' if it could
not be determined."""
mime, encoding = None, None
if mimedb:
mime, encoding = mimedb.guess_type(filename, strict=False)
basename = os.path.basename(filename)
# Special case for Safari Bookmark files
if not mime and basename == 'Bookmarks.plist':
return 'application/x-plist+safari'
# Special case for Google Chrome Bookmark files.
if not mime and basename == 'Bookmarks':
mime = 'text/plain'
# Some mime types can be differentiated further with content reading.
if mime in ("text/plain", "application/xml", "text/xml") and read is not None:
read_mime = guess_mimetype_read(read)
if read_mime is not None:
mime = read_mime
if not mime:
mime = "application/octet-stream"
elif ";" in mime:
# split off not needed extension info
mime = mime.split(';')[0]
return mime.strip().lower()
def guess_mimetype_read(read):
"""Try to read some content and do a poor man's file(1)."""
mime = None
try:
data = read()[:70]
except Exception:
pass
else:
for cmime, ro in PARSE_CONTENTS.items():
if ro.search(data):
mime = cmime
break
return mime
def get_temp_file (mode='r', **kwargs):
"""Return tuple (open file object, filename) pointing to a temporary
file."""
fd, filename = tempfile.mkstemp(**kwargs)
return os.fdopen(fd, mode), filename
def is_tty (fp):
"""Check if is a file object pointing to a TTY."""
return (hasattr(fp, "isatty") and fp.isatty())
@memoized
def is_readable(filename):
"""Check if file is a regular file and is readable."""
return os.path.isfile(filename) and os.access(filename, os.R_OK)
def is_accessable_by_others(filename):
"""Check if file is group or world accessable."""
mode = os.stat(filename)[stat.ST_MODE]
return mode & (stat.S_IRWXG | stat.S_IRWXO)
def is_writable_by_others(filename):
"""Check if file or directory is world writable."""
mode = os.stat(filename)[stat.ST_MODE]
return mode & stat.S_IWOTH
@memoized
def is_writable(filename):
"""Check if
- the file is a regular file and is writable, or
- the file does not exist and its parent directory exists and is
writable
"""
if not os.path.exists(filename):
parentdir = os.path.dirname(filename)
return os.path.isdir(parentdir) and os.access(parentdir, os.W_OK)
return os.path.isfile(filename) and os.access(filename, os.W_OK)
init_mimedb()