mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 22:40:33 +00:00
209 lines
6.5 KiB
Python
209 lines
6.5 KiB
Python
# -*- coding: iso-8859-1 -*-
|
|
# Copyright (C) 2005-2010 Bastian Kleineidam
|
|
#
|
|
# This program is free software; you can redistribute it and/or modify
|
|
# it under the terms of the GNU General Public License as published by
|
|
# the Free Software Foundation; either version 2 of the License, or
|
|
# (at your option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful,
|
|
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
# GNU General Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
|
"""
|
|
File and path utilities.
|
|
"""
|
|
|
|
import os
|
|
import re
|
|
import locale
|
|
import stat
|
|
import fnmatch
|
|
import mimetypes
|
|
|
|
|
|
def write_file (filename, content, backup=False, callback=None):
|
|
"""Overwrite a possibly existing file with new content. Do this
|
|
in a manner that does not leave truncated or broken files behind.
|
|
@param filename: name of file to write
|
|
@type filename: string
|
|
@param content: file content to write
|
|
@type content: string
|
|
@param backup: if backup file should be left
|
|
@type backup: bool
|
|
@param callback: non-default storage function
|
|
@type callback: None or function taking two parameters (fileobj, content)
|
|
"""
|
|
# first write in a temp file
|
|
f = file(filename+".tmp", 'wb')
|
|
if callback is None:
|
|
f.write(content)
|
|
else:
|
|
callback(f, content)
|
|
f.close()
|
|
# move orig file to backup
|
|
if os.path.exists(filename):
|
|
os.rename(filename, filename+".bak")
|
|
# move temp file to orig
|
|
os.rename(filename+".tmp", filename)
|
|
# remove backup
|
|
if not backup and os.path.exists(filename+".bak"):
|
|
os.remove(filename+".bak")
|
|
|
|
|
|
def has_module (name):
|
|
"""Test if given module can be imported.
|
|
@return: flag if import is successful
|
|
@rtype: bool
|
|
"""
|
|
try:
|
|
exec "import %s" % name
|
|
return True
|
|
except ImportError:
|
|
return False
|
|
|
|
|
|
class GlobDirectoryWalker (object):
|
|
"""A forward iterator that traverses a directory tree."""
|
|
|
|
def __init__ (self, directory, pattern="*"):
|
|
"""Set start directory and pattern matcher."""
|
|
self.stack = [directory]
|
|
self.pattern = pattern
|
|
self.files = []
|
|
self.index = 0
|
|
|
|
def __getitem__ (self, index):
|
|
"""Search for next filename."""
|
|
while True:
|
|
try:
|
|
filename = self.files[self.index]
|
|
self.index += 1
|
|
except IndexError:
|
|
# Pop next directory from stack. This effectively
|
|
# stops the iteration if stack is empty.
|
|
self.directory = self.stack.pop()
|
|
self.files = os.listdir(self.directory)
|
|
self.index = 0
|
|
else:
|
|
# got a filename
|
|
fullname = os.path.join(self.directory, filename)
|
|
if os.path.isdir(fullname) and not os.path.islink(fullname):
|
|
self.stack.append(fullname)
|
|
if fnmatch.fnmatch(filename, self.pattern):
|
|
return fullname
|
|
|
|
# alias
|
|
rglob = GlobDirectoryWalker
|
|
|
|
|
|
class Buffer (object):
|
|
"""Holds buffered data"""
|
|
|
|
def __init__ (self, empty=''):
|
|
"""Initialize buffer."""
|
|
self.empty = self.buf = empty
|
|
self.tmpbuf = []
|
|
self.pos = 0
|
|
|
|
def __len__ (self):
|
|
"""Buffer length."""
|
|
return self.pos
|
|
|
|
def write (self, data):
|
|
"""Write data to buffer."""
|
|
self.tmpbuf.append(data)
|
|
self.pos += len(data)
|
|
|
|
def flush (self, overlap=0):
|
|
"""Flush buffered data and return it."""
|
|
self.buf += self.empty.join(self.tmpbuf)
|
|
self.tmpbuf = []
|
|
if overlap and overlap < self.pos:
|
|
data = self.buf[:-overlap]
|
|
self.buf = self.buf[-overlap:]
|
|
else:
|
|
data = self.buf
|
|
self.buf = self.empty
|
|
return data
|
|
|
|
|
|
def get_mtime (filename):
|
|
"""Return modification time of filename or zero on errors."""
|
|
try:
|
|
return os.stat(filename)[stat.ST_MTIME]
|
|
except os.error:
|
|
return 0
|
|
|
|
|
|
def get_size (filename):
|
|
"""Return file size in Bytes, or -1 on error."""
|
|
try:
|
|
return os.stat(filename)[stat.ST_SIZE]
|
|
except os.error:
|
|
return -1
|
|
|
|
|
|
# http://developer.gnome.org/doc/API/2.0/glib/glib-running.html
|
|
if "G_FILENAME_ENCODING" in os.environ:
|
|
FSCODING = os.environ["G_FILENAME_ENCODING"].split(",")[0]
|
|
if FSCODING == "@locale":
|
|
FSCODING = locale.getpreferredencoding()
|
|
elif "G_BROKEN_FILENAMES" in os.environ:
|
|
FSCODING = locale.getpreferredencoding()
|
|
else:
|
|
FSCODING = "utf-8"
|
|
|
|
def pathencode (path):
|
|
if isinstance(path, unicode) and not os.path.supports_unicode_filenames:
|
|
path = path.encode(FSCODING, "replace")
|
|
return path
|
|
|
|
|
|
# cache for modified check {absolute filename -> mtime}
|
|
_mtime_cache = {}
|
|
def has_changed (filename):
|
|
"""Check if filename has changed since the last check. If this
|
|
is the first check, assume the file is changed."""
|
|
key = os.path.abspath(filename)
|
|
mtime = get_mtime(key)
|
|
if key not in _mtime_cache:
|
|
_mtime_cache[key] = mtime
|
|
return True
|
|
return mtime > _mtime_cache[key]
|
|
|
|
|
|
mimedb = mimetypes.MimeTypes(strict=False)
|
|
|
|
# if file extension lookup was unsuccessful, look at the content
|
|
PARSE_CONTENTS = {
|
|
"text/html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'),
|
|
"text/plain+opera": re.compile(r'^Opera Hotlist'),
|
|
"text/plain+linkchecker": re.compile(r'(?i)^# LinkChecker URL list'),
|
|
}
|
|
|
|
def guess_mimetype (filename, read=None):
|
|
"""Return MIME type of file, or 'application/octet-stream' if it could
|
|
not be determined."""
|
|
mime, encoding = mimedb.guess_type(filename, strict=False)
|
|
# Mime type text/plain can be differentiated further with content reading.
|
|
if (mime == "text/plain" or not mime) and read is not None:
|
|
# try to read some content and do a poor man's file(1)
|
|
# XXX replace with file(1) on Unix systems
|
|
try:
|
|
data = read()[:30]
|
|
for mime, ro in PARSE_CONTENTS.items():
|
|
if ro.search(data):
|
|
break
|
|
except Exception:
|
|
pass
|
|
if not mime:
|
|
mime = "application/octet-stream"
|
|
elif ";" in mime:
|
|
# split off not needed extension info
|
|
mime = mime.split(';')[0]
|
|
return mime.strip().lower()
|