# -*- coding: iso-8859-1 -*- # Copyright (C) 2005-2014 Bastian Kleineidam # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. """ File and path utilities. """ import os import re import mimetypes from . import log from .logconf import LOG_CHECK mimedb = None def init_mimedb(): """Initialize the local MIME database.""" global mimedb try: mimedb = mimetypes.MimeTypes(strict=False) except Exception as msg: log.error(LOG_CHECK, "could not initialize MIME database: %s" % msg) return # For Opera bookmark files (opera6.adr) add_mimetype(mimedb, 'text/plain', '.adr') # To recognize PHP files as HTML with content check. add_mimetype(mimedb, 'application/x-httpd-php', '.php') # To recognize WML files add_mimetype(mimedb, 'text/vnd.wap.wml', '.wml') def add_mimetype(mimedb, mimetype, extension): """Add or replace a mimetype to be used with the given extension.""" # If extension is already a common type, strict=True must be used. strict = extension in mimedb.types_map[True] mimedb.add_type(mimetype, extension, strict=strict) # if file extension lookup was unsuccessful, look at the content PARSE_CONTENTS = { "text/html": re.compile(r'^(?i)<(!DOCTYPE html|html|head|title)'), "text/plain+opera": re.compile(r'^Opera Hotlist'), "text/plain+chromium": re.compile(r'^{\s*"checksum":'), "text/plain+linkchecker": re.compile(r'(?i)^# LinkChecker URL list'), "application/xml+sitemapindex": re.compile(r'(?i)<\?xml[^<]+