mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-21 06:41:00 +00:00
Merge branch 'master' into py3
This commit is contained in:
commit
d89217efaa
40 changed files with 4802 additions and 4167 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
|
@ -29,7 +29,6 @@ Changelog.linkchecker*
|
|||
/doc/html/*.qch
|
||||
/.achievements
|
||||
/doc/*.mo
|
||||
/po/*.mo
|
||||
/LinkChecker-*-portable.zip
|
||||
/LinkChecker-*.exe
|
||||
/LinkChecker.egg-info
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
include README.txt COPYING MANIFEST.in
|
||||
include README.rst COPYING MANIFEST.in
|
||||
include config/linkchecker-completion config/create.sql
|
||||
include config/linkcheckerrc
|
||||
include config/linkchecker.apache2.conf install-rpm.sh
|
||||
|
|
@ -17,7 +17,7 @@ include linkcheck/gui/rc/Makefile
|
|||
include linkcheck/gui/rc/*.png
|
||||
include linkcheck/gui/rc/*.qrc
|
||||
include linkcheck/gui/ui/*.ui
|
||||
include po/*.po po/*.pot po/Makefile po/msgfmt.py
|
||||
include po/*.po po/*.mo po/*.pot po/Makefile
|
||||
include doc/*.example doc/*.txt
|
||||
include doc/html/*.ico
|
||||
include doc/html/*.html
|
||||
|
|
|
|||
3
Makefile
3
Makefile
|
|
@ -75,7 +75,6 @@ all:
|
|||
clean:
|
||||
-$(PYTHON) setup.py clean --all
|
||||
rm -f $(LAPPNAME)-out.* *-stamp*
|
||||
$(MAKE) -C po clean
|
||||
$(MAKE) -C doc/html clean
|
||||
$(MAKE) -C linkcheck/HtmlParser clean
|
||||
rm -f linkcheck/network/_network*.so
|
||||
|
|
@ -94,7 +93,7 @@ MANIFEST: MANIFEST.in setup.py
|
|||
$(PYTHON) setup.py sdist --manifest-only
|
||||
|
||||
locale:
|
||||
$(MAKE) -C po mofiles
|
||||
$(MAKE) -C po
|
||||
|
||||
# to build in the current directory
|
||||
localbuild: MANIFEST locale
|
||||
|
|
|
|||
|
|
@ -1,4 +1,23 @@
|
|||
9.3 "" (released xx.xx.2014)
|
||||
9.4 "" (released xx.xx.xxxx)
|
||||
|
||||
Features:
|
||||
- checking: Support itms-services: URLs.
|
||||
Closes: GH bug #532
|
||||
|
||||
Changes:
|
||||
- installation: Remove dependency on msgfmt.py by pre-generating the
|
||||
*.mo files and adding them to version control.
|
||||
Reason was the difficulty to run msgfmt.py under both Python 2 and 3.
|
||||
- checking: When checking SSL certificates under POSIX systems try
|
||||
to use the system certificate store.
|
||||
|
||||
Fixes:
|
||||
- checking: Correct typos in the proxy handling code.
|
||||
Closes: GH bug #536
|
||||
- cmdline: Reactivate paging of help pages.
|
||||
|
||||
|
||||
9.3 "Better Living Through Chemistry" (released 16.7.2014)
|
||||
|
||||
Features:
|
||||
- checking: Parse and check links in PDF files.
|
||||
|
|
@ -12,6 +31,7 @@ Changes:
|
|||
import needed third party modules.
|
||||
- checking: Treat empty URLs as same as parent URL.
|
||||
Closes: GH bug #524
|
||||
- installation: Replaced the twill dependency with local code.
|
||||
|
||||
Fixes:
|
||||
- checking: Catch XML parse errors in sitemap XML files and print them
|
||||
|
|
@ -28,6 +48,8 @@ Fixes:
|
|||
Closes: GH bug #519
|
||||
- checking: Use user-supplied authentication and proxies when requestiong
|
||||
robot.txt.
|
||||
- plugins: Fix Word file check plugin.
|
||||
Closes: GH bug #530
|
||||
|
||||
|
||||
9.2 "Rick and Morty" (released 23.4.2014)
|
||||
|
|
|
|||
|
|
@ -39,9 +39,6 @@ installation is recommended.
|
|||
- *Optional, for displaying country codes:*
|
||||
Pygeoip from http://code.google.com/p/pygeoip/
|
||||
|
||||
- *Optional, used for login form submission:*
|
||||
Twill from http://twill.idyll.org/
|
||||
|
||||
|
||||
Setup for Unix/Linux
|
||||
--------------------
|
||||
|
|
|
|||
|
|
@ -73,15 +73,12 @@ First, install the required software.
|
|||
7. *Optional, used for Virus checking:*
|
||||
ClamAv from http://www.clamav.net/
|
||||
|
||||
8. *Optional, used for login form submission:*
|
||||
Twill from http://twill.idyll.org/
|
||||
|
||||
9. *Optional, for GNOME proxy setting parsing:*
|
||||
8. *Optional, for GNOME proxy setting parsing:*
|
||||
Python Gtk from http://www.pygtk.org/downloads.html
|
||||
|
||||
10. *Optional, to run the WSGI web interface:*
|
||||
Apache from http://httpd.apache.org/
|
||||
mod_wsgi from http://code.google.com/p/modwsgi/
|
||||
9. *Optional, to run the WSGI web interface:*
|
||||
Apache from http://httpd.apache.org/
|
||||
mod_wsgi from http://code.google.com/p/modwsgi/
|
||||
|
||||
|
||||
Now install the application.
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
Date: 15.7.2014
|
||||
|
||||
Porting status of dependent Python packages
|
||||
============================================
|
||||
|
||||
Date: 3.3.2014
|
||||
|
||||
OK Python
|
||||
OK requests
|
||||
OK Qt/PyQt
|
||||
|
|
@ -11,4 +11,8 @@ OK argcomplete from https://pypi.python.org/pypi/argcomplete
|
|||
OK dnspython (as dnspython3)
|
||||
OK pygeoip from https://pypi.python.org/pypi/pygeoip/
|
||||
OK Port Python Gtk stuff to PyGObject https://live.gnome.org/PyGObject/IntrospectionPorting
|
||||
TODO(optional) Twill from http://twill.idyll.org/
|
||||
|
||||
Overall Porting status
|
||||
=======================
|
||||
|
||||
NOT STARTED
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
version: "9.2"
|
||||
version: "9.3"
|
||||
name: "LinkChecker"
|
||||
lname: "linkchecker"
|
||||
maintainer: "Bastian Kleineidam"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,11 @@ policy by the webmaster running the website you are checking. Look in
|
|||
the ``/robots.txt`` file which follows the
|
||||
[robots.txt exclusion standard](http://www.robotstxt.org/robotstxt.html).
|
||||
|
||||
For identification LinkChecker adds to each request a User-Agent header
|
||||
like this:
|
||||
|
||||
Mozilla/5.0 (compatible; LinkChecker/9.3; +http://wummel.github.io/linkchecker/)
|
||||
|
||||
If you yourself are the webmaster, consider allowing LinkChecker to
|
||||
check your web pages by adding the following to your robots.txt file:
|
||||
|
||||
|
|
|
|||
|
|
@ -6,7 +6,9 @@ Introduction
|
|||
LinkChecker is a free, [GPL](http://www.gnu.org/licenses/gpl-2.0.html)
|
||||
licensed website validator.
|
||||
LinkChecker checks links in web documents or full websites.
|
||||
It runs on systems with Python 2.7.2 or later.
|
||||
It runs on Python 2 systems, requiring Python 2.7.2 or later.
|
||||
Python 3 is not (yet) supported.
|
||||
|
||||
|
||||
Features
|
||||
---------
|
||||
|
|
|
|||
8
linkcheck/cache/robots_txt.py
vendored
8
linkcheck/cache/robots_txt.py
vendored
|
|
@ -17,7 +17,7 @@
|
|||
"""
|
||||
Cache robots.txt contents.
|
||||
"""
|
||||
from .. import robotparser2, configuration
|
||||
from .. import robotparser2
|
||||
from ..containers import LFUCache
|
||||
from ..decorators import synchronized
|
||||
from ..lock import get_lock
|
||||
|
|
@ -33,14 +33,14 @@ class RobotsTxt (object):
|
|||
Thread-safe cache of downloaded robots.txt files.
|
||||
format: {cache key (string) -> robots.txt content (RobotFileParser)}
|
||||
"""
|
||||
useragent = str(configuration.UserAgent)
|
||||
|
||||
def __init__ (self):
|
||||
def __init__ (self, useragent):
|
||||
"""Initialize per-URL robots.txt cache."""
|
||||
# mapping {URL -> parsed robots.txt}
|
||||
self.cache = LFUCache(size=100)
|
||||
self.hits = self.misses = 0
|
||||
self.roboturl_locks = {}
|
||||
self.useragent = useragent
|
||||
|
||||
def allows_url (self, url_data):
|
||||
"""Ask robots.txt allowance."""
|
||||
|
|
@ -59,7 +59,7 @@ class RobotsTxt (object):
|
|||
self.misses += 1
|
||||
kwargs = dict(auth=url_data.auth, session=url_data.session)
|
||||
if url_data.proxy:
|
||||
kwargs["proxies"] = {url_data.proxy_type, url_data.proxy}
|
||||
kwargs["proxies"] = {url_data.proxytype: url_data.proxy}
|
||||
rp = robotparser2.RobotFileParser(**kwargs)
|
||||
rp.set_url(roboturl)
|
||||
rp.read()
|
||||
|
|
|
|||
|
|
@ -143,6 +143,8 @@ def get_urlclass_from (scheme, assume_local_file=False):
|
|||
klass = nntpurl.NntpUrl
|
||||
elif scheme == "dns":
|
||||
klass = dnsurl.DnsUrl
|
||||
elif scheme == "itms-services":
|
||||
klass = itmsservicesurl.ItmsServicesUrl
|
||||
elif scheme and unknownurl.is_unknown_scheme(scheme):
|
||||
klass = unknownurl.UnknownUrl
|
||||
elif assume_local_file:
|
||||
|
|
@ -174,4 +176,4 @@ def get_index_html (urls):
|
|||
|
||||
# all the URL classes
|
||||
from . import (fileurl, unknownurl, ftpurl, httpurl, dnsurl,
|
||||
mailtourl, telneturl, nntpurl, ignoreurl)
|
||||
mailtourl, telneturl, nntpurl, ignoreurl, itmsservicesurl)
|
||||
|
|
|
|||
|
|
@ -47,6 +47,7 @@ ExcCacheList = [
|
|||
EOFError,
|
||||
# http errors
|
||||
requests.exceptions.RequestException,
|
||||
requests.packages.urllib3.exceptions.HTTPError,
|
||||
# ftp errors
|
||||
ftplib.error_reply,
|
||||
ftplib.error_temp,
|
||||
|
|
|
|||
|
|
@ -19,6 +19,13 @@ Handle http links.
|
|||
"""
|
||||
|
||||
import requests
|
||||
# The validity of SSL certs is ignored to be able
|
||||
# the check the URL and recurse into it.
|
||||
# The warning about invalid SSL certs is given to the
|
||||
# user instead.
|
||||
import warnings
|
||||
warnings.simplefilter('ignore', requests.packages.urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
from cStringIO import StringIO
|
||||
|
||||
from .. import (log, LOG_CHECK, strformat, mimeutil,
|
||||
|
|
|
|||
45
linkcheck/checker/itmsservicesurl.py
Normal file
45
linkcheck/checker/itmsservicesurl.py
Normal file
|
|
@ -0,0 +1,45 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Handle itms-services URLs.
|
||||
"""
|
||||
|
||||
from . import urlbase
|
||||
from .. import log, LOG_CHECK
|
||||
|
||||
|
||||
class ItmsServicesUrl(urlbase.UrlBase):
|
||||
"""Apple iOS application download URLs."""
|
||||
|
||||
def check_syntax(self):
|
||||
"""Only logs that this URL is unknown."""
|
||||
super(ItmsServicesUrl, self).check_syntax()
|
||||
if u"url=" not in self.urlparts[3]:
|
||||
self.set_result(_("Missing required url parameter"), valid=False)
|
||||
|
||||
def local_check(self):
|
||||
"""Disable content checks."""
|
||||
log.debug(LOG_CHECK, "Checking %s", unicode(self))
|
||||
pass
|
||||
|
||||
def check_content(self):
|
||||
"""Allow recursion to check the url CGI param."""
|
||||
return True
|
||||
|
||||
def is_parseable(self):
|
||||
"""This URL is parseable."""
|
||||
return True
|
||||
|
|
@ -18,6 +18,7 @@
|
|||
Mixin class for URLs that can be fetched over a proxy.
|
||||
"""
|
||||
import urllib
|
||||
import urlparse
|
||||
import os
|
||||
from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
|
||||
|
||||
|
|
@ -35,29 +36,30 @@ class ProxySupport (object):
|
|||
self.proxyauth = None
|
||||
if not self.proxy:
|
||||
return
|
||||
self.proxytype, self.proxy = urllib.splittype(self.proxy)
|
||||
proxyurl = urlparse.urlparse(self.proxy)
|
||||
self.proxytype = proxyurl.scheme
|
||||
if self.proxytype not in ('http', 'https'):
|
||||
# Note that invalid proxies might raise TypeError in urllib2,
|
||||
# so make sure to stop checking at this point, not later.
|
||||
msg = _("Proxy value `%(proxy)s' must start with 'http:' or 'https:'.") \
|
||||
% dict(proxy=proxy)
|
||||
raise LinkCheckerError(msg)
|
||||
self.proxy = urllib.splithost(self.proxy)[0]
|
||||
self.proxyauth, self.proxy = urllib.splituser(self.proxy)
|
||||
if self.ignore_proxy_host():
|
||||
# log proxy without auth info
|
||||
log.debug(LOG_CHECK, "ignoring proxy %r", self.proxy)
|
||||
self.add_info(_("Ignoring proxy setting `%(proxy)s'.") %
|
||||
dict(proxy=proxy))
|
||||
self.proxy = self.proxyauth = None
|
||||
self.proxy = None
|
||||
return
|
||||
log.debug(LOG_CHECK, "using proxy %r", self.proxy)
|
||||
self.add_info(_("Using proxy `%(proxy)s'.") % dict(proxy=self.proxy))
|
||||
if self.proxyauth is not None:
|
||||
if ":" not in self.proxyauth:
|
||||
self.proxyauth += ":"
|
||||
self.proxyauth = httputil.encode_base64(self.proxyauth)
|
||||
self.proxyauth = "Basic "+self.proxyauth
|
||||
self.proxyhost = proxyurl.hostname
|
||||
self.proxyport = proxyurl.port
|
||||
if proxyurl.username is not None:
|
||||
username = proxyurl.username
|
||||
password = proxyurl.password if proxy.password is not None else ""
|
||||
auth = "%s:%s" % (username, password)
|
||||
self.proxyauth = "Basic "+httputil.encode_base64(auth)
|
||||
|
||||
def ignore_proxy_host (self):
|
||||
"""Check if self.host is in the $no_proxy ignore list."""
|
||||
|
|
@ -79,7 +81,8 @@ class ProxySupport (object):
|
|||
"""
|
||||
if self.proxy:
|
||||
scheme = self.proxytype
|
||||
host, port = urlutil.splitport(self.proxy)
|
||||
host = self.proxyhost
|
||||
port = self.proxyport
|
||||
else:
|
||||
scheme = self.scheme
|
||||
host = self.host
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ from .director import console
|
|||
class LCArgumentParser(argparse.ArgumentParser):
|
||||
"""Custom argument parser to format help text."""
|
||||
|
||||
def print_help(self, file=None):
|
||||
def print_help(self, file=sys.stdout):
|
||||
"""Print a help message to stdout."""
|
||||
msg = console.encode(self.format_help())
|
||||
if fileutil.is_tty(file):
|
||||
|
|
|
|||
|
|
@ -63,7 +63,6 @@ Modules = (
|
|||
("argcomplete", u"Argcomplete"),
|
||||
("GeoIP", u"GeoIP"), # on Unix systems
|
||||
("pygeoip", u"GeoIP"), # on Windows systems
|
||||
("twill", u"Twill"),
|
||||
("sqlite3", u"Sqlite"),
|
||||
("gconf", u"Gconf"),
|
||||
("meliae", u"Meliae"),
|
||||
|
|
@ -117,6 +116,34 @@ def get_share_file (filename, devel_dir=None):
|
|||
raise ValueError(msg)
|
||||
|
||||
|
||||
def get_system_cert_file():
|
||||
"""Try to find a system-wide SSL certificate file.
|
||||
@return: the filename to the cert file
|
||||
@raises: ValueError when no system cert file could be found
|
||||
"""
|
||||
if os.name == 'posix':
|
||||
filename = "/etc/ssl/certs/ca-certificates.crt"
|
||||
if os.path.isfile(filename):
|
||||
return filename
|
||||
msg = "no system certificate file found"
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
def get_certifi_file():
|
||||
"""Get the SSL certifications installed by the certifi package.
|
||||
@return: the filename to the cert file
|
||||
@rtype: string
|
||||
@raises: ImportError when certifi is not installed or ValueError when
|
||||
the file is not found
|
||||
"""
|
||||
import certifi
|
||||
filename = certifi.where()
|
||||
if os.path.isfile(filename):
|
||||
return filename
|
||||
msg = "%s not found; check your certifi installation" % filename
|
||||
raise ValueError(msg)
|
||||
|
||||
|
||||
# dynamic options
|
||||
class Configuration (dict):
|
||||
"""
|
||||
|
|
@ -219,7 +246,6 @@ class Configuration (dict):
|
|||
filtered_cfiles.append(cfile)
|
||||
log.debug(LOG_CHECK, "reading configuration from %s", filtered_cfiles)
|
||||
confparse.LCConfigParser(self).read(filtered_cfiles)
|
||||
self.sanitize()
|
||||
|
||||
def add_auth (self, user=None, password=None, pattern=None):
|
||||
"""Add given authentication data."""
|
||||
|
|
@ -317,12 +343,20 @@ class Configuration (dict):
|
|||
self[plugin] = {}
|
||||
|
||||
def sanitize_ssl(self):
|
||||
"""Use locally installed certificate file if available."""
|
||||
"""Use local installed certificate file if available.
|
||||
Tries to get system, then certifi, then the own
|
||||
installed certificate file."""
|
||||
if self["sslverify"] is True:
|
||||
try:
|
||||
self["sslverify"] = get_share_file('cacert.pem')
|
||||
self["sslverify"] = get_system_cert_file()
|
||||
except ValueError:
|
||||
pass
|
||||
try:
|
||||
self["sslverify"] = get_certifi_file()
|
||||
except (ValueError, ImportError):
|
||||
try:
|
||||
self["sslverify"] = get_share_file('cacert.pem')
|
||||
except ValueError:
|
||||
pass
|
||||
|
||||
|
||||
def get_plugin_folders():
|
||||
|
|
|
|||
|
|
@ -20,100 +20,18 @@ Management of checking a queue of links with several threads.
|
|||
import os
|
||||
import thread
|
||||
import time
|
||||
from .. import log, LOG_CHECK, LinkCheckerInterrupt, dummy, \
|
||||
fileutil, strformat, plugins
|
||||
from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins
|
||||
from ..cache import urlqueue, robots_txt, results
|
||||
from . import aggregator, console
|
||||
|
||||
|
||||
def visit_loginurl (aggregate):
|
||||
"""Check for a login URL and visit it."""
|
||||
config = aggregate.config
|
||||
url = config["loginurl"]
|
||||
if not url:
|
||||
return
|
||||
if not fileutil.has_module("twill"):
|
||||
msg = strformat.format_feature_warning(module=u'twill',
|
||||
feature=u'login URL visit',
|
||||
url=u'http://twill.idyll.org/')
|
||||
log.warn(LOG_CHECK, msg)
|
||||
return
|
||||
from twill import commands as tc
|
||||
log.debug(LOG_CHECK, u"Visiting login URL %s", url)
|
||||
configure_twill(tc)
|
||||
tc.go(url)
|
||||
if tc.get_browser().get_code() != 200:
|
||||
log.warn(LOG_CHECK, _("Error visiting login URL %(url)s.") % \
|
||||
{"url": url})
|
||||
return
|
||||
submit_login_form(config, url, tc)
|
||||
if tc.get_browser().get_code() != 200:
|
||||
log.warn(LOG_CHECK, _("Error posting form at login URL %(url)s.") % \
|
||||
{"url": url})
|
||||
return
|
||||
#XXX store_cookies(tc.get_browser().cj, aggregate.cookies, url)
|
||||
resulturl = tc.get_browser().get_url()
|
||||
log.debug(LOG_CHECK, u"URL after POST is %s" % resulturl)
|
||||
# add result URL to check list
|
||||
from ..checker import get_url_from
|
||||
aggregate.urlqueue.put(get_url_from(resulturl, 0, aggregate))
|
||||
|
||||
|
||||
def configure_twill (tc):
|
||||
"""Configure twill to be used by LinkChecker.
|
||||
Note that there is no need to set a proxy since twill uses the same
|
||||
ones (provided from urllib) as LinkChecker does.
|
||||
"""
|
||||
# make sure readonly controls are writeable (might be needed)
|
||||
tc.config("readonly_controls_writeable", True)
|
||||
# disable page refreshing
|
||||
tc.config("acknowledge_equiv_refresh", False)
|
||||
# fake IE 6.0 to talk sense into some sites (eg. SourceForge)
|
||||
tc.agent("Mozilla/4.0 (compatible; MSIE 6.0; Windows NT 5.0)")
|
||||
# tell twill to shut up
|
||||
tc.OUT = dummy.Dummy()
|
||||
from twill import browser
|
||||
browser.OUT = dummy.Dummy()
|
||||
# set debug level
|
||||
if log.is_debug(LOG_CHECK):
|
||||
tc.debug("http", 1)
|
||||
|
||||
|
||||
def submit_login_form (config, url, tc):
|
||||
"""Fill and submit login form."""
|
||||
user, password = config.get_user_password(url)
|
||||
cgiuser = config["loginuserfield"]
|
||||
cgipassword = config["loginpasswordfield"]
|
||||
formname = search_formname((cgiuser, cgipassword), tc)
|
||||
tc.formvalue(formname, cgiuser, user)
|
||||
tc.formvalue(formname, cgipassword, password)
|
||||
for key, value in config["loginextrafields"].items():
|
||||
tc.formvalue(formname, key, value)
|
||||
tc.submit()
|
||||
|
||||
|
||||
def search_formname (fieldnames, tc):
|
||||
"""Search form that has all given CGI fieldnames."""
|
||||
browser = tc.get_browser()
|
||||
for formcounter, form in enumerate(browser.get_all_forms()):
|
||||
for name in fieldnames:
|
||||
try:
|
||||
browser.get_form_field(form, name)
|
||||
except tc.TwillException:
|
||||
break
|
||||
else:
|
||||
return form.name or form.attrs.get('id') or formcounter
|
||||
# none found
|
||||
return None
|
||||
|
||||
|
||||
def check_urls (aggregate):
|
||||
"""Main check function; checks all configured URLs until interrupted
|
||||
with Ctrl-C.
|
||||
@return: None
|
||||
"""
|
||||
try:
|
||||
visit_loginurl(aggregate)
|
||||
aggregate.visit_loginurl()
|
||||
except Exception as msg:
|
||||
log.warn(LOG_CHECK, _("Error using login URL: %(msg)s.") % \
|
||||
dict(msg=msg))
|
||||
|
|
@ -210,7 +128,7 @@ def abort_now ():
|
|||
def get_aggregate (config):
|
||||
"""Get an aggregator instance with given configuration."""
|
||||
_urlqueue = urlqueue.UrlQueue(max_allowed_urls=config["maxnumurls"])
|
||||
_robots_txt = robots_txt.RobotsTxt()
|
||||
_robots_txt = robots_txt.RobotsTxt(config["useragent"])
|
||||
plugin_manager = plugins.PluginManager(config)
|
||||
result_cache = results.ResultCache()
|
||||
return aggregator.Aggregate(config, _urlqueue, _robots_txt, plugin_manager,
|
||||
|
|
|
|||
|
|
@ -21,10 +21,12 @@ import threading
|
|||
import thread
|
||||
import requests
|
||||
import time
|
||||
import urlparse
|
||||
import random
|
||||
from .. import log, LOG_CHECK, strformat, cookies
|
||||
from .. import log, LOG_CHECK, strformat, LinkCheckerError
|
||||
from ..decorators import synchronized
|
||||
from ..cache import urlqueue
|
||||
from ..htmlutil import formsearch
|
||||
from . import logger, status, checker, interrupt
|
||||
|
||||
|
||||
|
|
@ -32,15 +34,15 @@ _threads_lock = threading.RLock()
|
|||
_hosts_lock = threading.RLock()
|
||||
_downloadedbytes_lock = threading.RLock()
|
||||
|
||||
def new_request_session(config):
|
||||
def new_request_session(config, cookies):
|
||||
"""Create a new request session."""
|
||||
session = requests.Session()
|
||||
if cookies:
|
||||
session.cookies = cookies
|
||||
session.max_redirects = config["maxhttpredirects"]
|
||||
session.headers = {
|
||||
"User-Agent": config["useragent"],
|
||||
"DNT": "1",
|
||||
}
|
||||
# XXX proxies
|
||||
if config["cookiefile"]:
|
||||
for cookie in cookies.from_file(config["cookiefile"]):
|
||||
session.cookies = requests.cookies.merge_cookies(session.cookies, cookie)
|
||||
|
|
@ -62,11 +64,36 @@ class Aggregate (object):
|
|||
self.plugin_manager = plugin_manager
|
||||
self.result_cache = result_cache
|
||||
self.times = {}
|
||||
self.cookies = None
|
||||
requests_per_second = config["maxrequestspersecond"]
|
||||
self.wait_time_min = 1.0 / requests_per_second
|
||||
self.wait_time_max = max(self.wait_time_min + 0.5, 0.5)
|
||||
self.downloaded_bytes = 0
|
||||
|
||||
def visit_loginurl(self):
|
||||
"""Check for a login URL and visit it."""
|
||||
url = self.config["loginurl"]
|
||||
if not url:
|
||||
return
|
||||
user, password = self.config.get_user_password(url)
|
||||
session = requests.Session()
|
||||
# XXX user-agent header
|
||||
# XXX timeout
|
||||
response = session.get(url)
|
||||
cgiuser = self.config["loginuserfield"]
|
||||
cgipassword = self.config["loginpasswordfield"]
|
||||
form = formsearch.search_form(response.content, cgiuser, cgipassword,
|
||||
encoding=response.encoding)
|
||||
form.data[cgiuser] = user
|
||||
form.data[cgipassword] = password
|
||||
for key, value in self.config["loginextrafields"].items():
|
||||
form.data[key] = value
|
||||
formurl = urlparse.urljoin(url, form.url)
|
||||
response = session.post(formurl, data=form.data)
|
||||
self.cookies = session.cookies
|
||||
if len(self.cookies) == 0:
|
||||
raise LinkCheckerError("No cookies set by login URL %s" % url)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def start_threads (self):
|
||||
"""Spawn threads for URL checking and status printing."""
|
||||
|
|
@ -85,13 +112,13 @@ class Aggregate (object):
|
|||
self.threads.append(t)
|
||||
t.start()
|
||||
else:
|
||||
self.request_sessions[thread.get_ident()] = new_request_session(self.config)
|
||||
self.request_sessions[thread.get_ident()] = new_request_session(self.config, self.cookies)
|
||||
checker.check_urls(self.urlqueue, self.logger)
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
def add_request_session(self):
|
||||
"""Add a request session for current thread."""
|
||||
session = new_request_session(self.config)
|
||||
session = new_request_session(self.config, self.cookies)
|
||||
self.request_sessions[thread.get_ident()] = session
|
||||
|
||||
@synchronized(_threads_lock)
|
||||
|
|
|
|||
113
linkcheck/htmlutil/formsearch.py
Normal file
113
linkcheck/htmlutil/formsearch.py
Normal file
|
|
@ -0,0 +1,113 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
# the Free Software Foundation; either version 2 of the License, or
|
||||
# (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
HTML form utils
|
||||
"""
|
||||
from ..HtmlParser import htmlsax
|
||||
from .. import log, LOG_CHECK
|
||||
|
||||
class Form(object):
|
||||
"""Store HTML form URL and form data."""
|
||||
|
||||
def __init__(self, url):
|
||||
"""Set URL and empty form data."""
|
||||
self.url = url
|
||||
self.data = {}
|
||||
|
||||
def add_value(self, key, value):
|
||||
"""Add a form value."""
|
||||
self.data[key] = value
|
||||
|
||||
def __repr__(self):
|
||||
"""Return unicode representation displaying URL and form data."""
|
||||
return unicode(self)
|
||||
|
||||
def __unicode__(self):
|
||||
"""Return unicode string displaying URL and form data."""
|
||||
return u"<url=%s data=%s>" % (self.url, self.data)
|
||||
|
||||
def __str__(self):
|
||||
"""Return string displaying URL and form data."""
|
||||
return unicode(self).encode('utf-8')
|
||||
|
||||
|
||||
class FormFinder(object):
|
||||
"""Base class handling HTML start elements.
|
||||
TagFinder instances are used as HtmlParser handlers."""
|
||||
|
||||
def __init__(self):
|
||||
"""Initialize local variables."""
|
||||
super(FormFinder, self).__init__()
|
||||
# parser object will be initialized when it is used as
|
||||
# a handler object
|
||||
self.parser = None
|
||||
self.forms = []
|
||||
self.form = None
|
||||
|
||||
def start_element(self, tag, attrs):
|
||||
"""Does nothing, override in a subclass."""
|
||||
if tag == u'form':
|
||||
if u'action' in attrs:
|
||||
url = attrs['action']
|
||||
self.form = Form(url)
|
||||
elif tag == u'input':
|
||||
if self.form:
|
||||
if 'name' in attrs:
|
||||
key = attrs['name']
|
||||
value = attrs.get('value')
|
||||
self.form.add_value(key, value)
|
||||
else:
|
||||
log.warning(LOG_CHECK, "nameless form input %s" % attrs)
|
||||
pass
|
||||
else:
|
||||
log.warning(LOG_CHECK, "formless input´%s" % attrs)
|
||||
pass
|
||||
|
||||
def start_end_element(self, tag, attrs):
|
||||
"""Delegate a combined start/end element (eg. <input .../>) to
|
||||
the start_element method. Ignore the end element part."""
|
||||
self.start_element(tag, attrs)
|
||||
|
||||
def end_element(self, tag):
|
||||
"""search for ending form values."""
|
||||
if tag == u'form':
|
||||
self.forms.append(self.form)
|
||||
self.form = None
|
||||
|
||||
|
||||
def search_form(content, cgiuser, cgipassword, encoding='utf-8'):
|
||||
"""Search for a HTML form in the given HTML content that has the given
|
||||
CGI fields. If no form is found return None.
|
||||
"""
|
||||
handler = FormFinder()
|
||||
parser = htmlsax.parser(handler)
|
||||
handler.parser = parser
|
||||
parser.encoding = encoding
|
||||
# parse
|
||||
parser.feed(content)
|
||||
parser.flush()
|
||||
# break cyclic dependencies
|
||||
handler.parser = None
|
||||
parser.handler = None
|
||||
log.debug(LOG_CHECK, "Found forms %s", handler.forms)
|
||||
cginames = (cgiuser.lower(), cgipassword.lower())
|
||||
for form in handler.forms:
|
||||
for key, value in form.data.items():
|
||||
if key.lower() in cginames:
|
||||
return form
|
||||
# not found
|
||||
return None
|
||||
|
|
@ -17,7 +17,7 @@
|
|||
"""
|
||||
Main functions for link parsing
|
||||
"""
|
||||
from .. import log, LOG_CHECK, strformat
|
||||
from .. import log, LOG_CHECK, strformat, url as urlutil
|
||||
from ..htmlutil import linkparse
|
||||
from ..HtmlParser import htmlsax
|
||||
from ..bookmarks import firefox
|
||||
|
|
@ -30,6 +30,8 @@ def parse_url(url_data):
|
|||
key = "html"
|
||||
elif url_data.is_file() and firefox.has_sqlite and firefox.extension.search(url_data.url):
|
||||
key = "firefox"
|
||||
elif url_data.scheme == "itms-services":
|
||||
key = "itms_services"
|
||||
else:
|
||||
# determine parse routine according to content types
|
||||
mime = url_data.content_type
|
||||
|
|
@ -140,4 +142,13 @@ def parse_firefox (url_data):
|
|||
url_data.add_url(url, name=name)
|
||||
|
||||
|
||||
def parse_itms_services(url_data):
|
||||
"""Get "url" CGI parameter value as child URL."""
|
||||
query = url_data.urlparts[3]
|
||||
for k, v, sep in urlutil.parse_qsl(query, keep_blank_values=True):
|
||||
if k == "url":
|
||||
url_data.add_url(v)
|
||||
break
|
||||
|
||||
|
||||
from .sitemap import parse_sitemap, parse_sitemapindex
|
||||
|
|
|
|||
|
|
@ -32,7 +32,7 @@ from .. import fileutil, log, LOG_PLUGIN
|
|||
_initialized = False
|
||||
def init_win32com ():
|
||||
"""Initialize the win32com.client cache."""
|
||||
global _initialized
|
||||
global _initialized
|
||||
if _initialized:
|
||||
return
|
||||
import win32com.client
|
||||
|
|
@ -117,7 +117,8 @@ class WordParser(_ParserPlugin):
|
|||
|
||||
def check(self, url_data):
|
||||
"""Parse Word data."""
|
||||
filename = get_temp_filename()
|
||||
content = url_data.get_content()
|
||||
filename = get_temp_filename(content)
|
||||
# open word file and parse hyperlinks
|
||||
try:
|
||||
app = get_word_app()
|
||||
|
|
|
|||
|
|
@ -255,7 +255,7 @@ def url_parse_query (query, encoding=None):
|
|||
query, rest = query.rsplit('?', 1)
|
||||
append = '?'+url_parse_query(rest)+append
|
||||
l = []
|
||||
for k, v, sep in parse_qsl(query, True):
|
||||
for k, v, sep in parse_qsl(query, keep_blank_values=True):
|
||||
k = url_quote_part(k, '/-:,;')
|
||||
if v:
|
||||
v = url_quote_part(v, '/-:,;')
|
||||
|
|
@ -373,7 +373,7 @@ def collapse_segments (path):
|
|||
return path
|
||||
|
||||
|
||||
url_is_absolute = re.compile("^[a-z]+:", re.I).match
|
||||
url_is_absolute = re.compile(r"^[-\.a-z]+:", re.I).match
|
||||
|
||||
|
||||
def url_quote (url):
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
Project: LinkChecker
|
||||
Version: 9.2
|
||||
Version: 9.3
|
||||
Website-URL: http://wummel.github.io/linkchecker/
|
||||
Changelog-URL: https://github.com/wummel/linkchecker/blob/master/doc/changelog.txt
|
||||
Source-Package-URL: https://pypi.python.org/packages/source/L/LinkChecker/LinkChecker-${version}.tar.gz
|
||||
|
|
|
|||
15
po/Makefile
15
po/Makefile
|
|
@ -10,17 +10,12 @@ MYMAIL := bastian.kleineidam@web.de
|
|||
LFILE = LC_MESSAGES/$(PACKAGE).mo
|
||||
# defined language (add new languages here)
|
||||
LANGUAGES = de fr es
|
||||
MOFILES = $(wildcard *.po)
|
||||
|
||||
all:
|
||||
all: $(MOFILES)
|
||||
|
||||
mofiles:
|
||||
@for la in $(LANGUAGES); do \
|
||||
if [ ! -d $(LDIR)/$$la/LC_MESSAGES ]; then \
|
||||
mkdir -p $(LDIR)/$$la/LC_MESSAGES; \
|
||||
fi; \
|
||||
echo "Formatting language catalog $${la}:"; \
|
||||
$(MSGFMT) -c --statistics -o $(LDIR)/$$la/$(LFILE) $$la.po; \
|
||||
done
|
||||
%.mo: %.po
|
||||
$(MSGFMT) -c --statistics -o $@ $<
|
||||
|
||||
%.po: $(TEMPLATE)
|
||||
$(MSGMERGE) -U --suffix=.bak $@ $<
|
||||
|
|
@ -42,4 +37,4 @@ clean:
|
|||
@for f in $(LANGUAGES); do rm -f $(LDIR)/$$f/$(LFILE); done
|
||||
rm -f *.mo *.bak
|
||||
|
||||
.PHONY: mofiles clean
|
||||
.PHONY: clean
|
||||
|
|
|
|||
BIN
po/de.mo
Normal file
BIN
po/de.mo
Normal file
Binary file not shown.
BIN
po/es.mo
Normal file
BIN
po/es.mo
Normal file
Binary file not shown.
BIN
po/fr.mo
Normal file
BIN
po/fr.mo
Normal file
Binary file not shown.
File diff suppressed because it is too large
Load diff
210
po/msgfmt.py
210
po/msgfmt.py
|
|
@ -1,210 +0,0 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# License: Python license
|
||||
# Copyright by Martin v. Löwis <loewis@informatik.hu-berlin.de>
|
||||
# Plural forms support added by alexander smishlajev <alex@tycobka.lv>
|
||||
"""
|
||||
Generate binary message catalog from textual translation description.
|
||||
|
||||
This program converts a textual Uniforum-style message catalog (.po file) into
|
||||
a binary GNU catalog (.mo file). This is essentially the same function as the
|
||||
GNU msgfmt program, however, it is a simpler implementation.
|
||||
|
||||
Usage: msgfmt.py [OPTIONS] filename.po
|
||||
|
||||
Options:
|
||||
-o file
|
||||
--output-file=file
|
||||
Specify the output file to write to. If omitted, output will go to a
|
||||
file named filename.mo (based off the input file name).
|
||||
|
||||
-h
|
||||
--help
|
||||
Print this message and exit.
|
||||
|
||||
-V
|
||||
--version
|
||||
Display version information and exit.
|
||||
"""
|
||||
|
||||
import sys
|
||||
import os
|
||||
import getopt
|
||||
import struct
|
||||
import array
|
||||
|
||||
__version__ = "1.1"
|
||||
|
||||
MESSAGES = {}
|
||||
|
||||
|
||||
def usage (ecode, msg=''):
|
||||
"""Print usage and msg and exit with given code."""
|
||||
print >> sys.stderr, __doc__
|
||||
if msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(ecode)
|
||||
|
||||
|
||||
def add (msgid, transtr, fuzzy):
|
||||
"""Add a non-fuzzy translation to the dictionary."""
|
||||
if not fuzzy and transtr and not transtr.startswith('\0'):
|
||||
MESSAGES[msgid] = transtr
|
||||
|
||||
|
||||
def generate ():
|
||||
"""Return the generated output."""
|
||||
keys = MESSAGES.keys()
|
||||
# the keys are sorted in the .mo file
|
||||
keys.sort()
|
||||
offsets = []
|
||||
ids = strs = ''
|
||||
for _id in keys:
|
||||
# For each string, we need size and file offset. Each string is NUL
|
||||
# terminated; the NUL does not count into the size.
|
||||
offsets.append((len(ids), len(_id), len(strs), len(MESSAGES[_id])))
|
||||
ids += _id + '\0'
|
||||
strs += MESSAGES[_id] + '\0'
|
||||
# The header is 7 32-bit unsigned integers. We don't use hash tables, so
|
||||
# the keys start right after the index tables.
|
||||
# translated string.
|
||||
keystart = 7*4+16*len(keys)
|
||||
# and the values start after the keys
|
||||
valuestart = keystart + len(ids)
|
||||
koffsets = []
|
||||
voffsets = []
|
||||
# The string table first has the list of keys, then the list of values.
|
||||
# Each entry has first the size of the string, then the file offset.
|
||||
for o1, l1, o2, l2 in offsets:
|
||||
koffsets += [l1, o1+keystart]
|
||||
voffsets += [l2, o2+valuestart]
|
||||
offsets = koffsets + voffsets
|
||||
output = struct.pack("Iiiiiii",
|
||||
0x950412deL, # Magic
|
||||
0, # Version
|
||||
len(keys), # # of entries
|
||||
7*4, # start of key index
|
||||
7*4+len(keys)*8, # start of value index
|
||||
0, 0) # size and offset of hash table
|
||||
output += array.array("i", offsets).tostring()
|
||||
output += ids
|
||||
output += strs
|
||||
return output
|
||||
|
||||
|
||||
def make (filename, outfile):
|
||||
ID = 1
|
||||
STR = 2
|
||||
MESSAGES.clear()
|
||||
|
||||
# Compute .mo name from .po name and arguments
|
||||
if filename.endswith('.po'):
|
||||
infile = filename
|
||||
else:
|
||||
infile = filename + '.po'
|
||||
if outfile is None:
|
||||
outfile = os.path.splitext(infile)[0] + '.mo'
|
||||
|
||||
try:
|
||||
lines = open(infile).readlines()
|
||||
except IOError, msg:
|
||||
print >> sys.stderr, msg
|
||||
sys.exit(1)
|
||||
|
||||
section = None
|
||||
fuzzy = 0
|
||||
|
||||
# Parse the catalog
|
||||
msgid = msgstr = ''
|
||||
lno = 0
|
||||
for l in lines:
|
||||
lno += 1
|
||||
# If we get a comment line after a msgstr, this is a new entry
|
||||
if l[0] == '#' and section == STR:
|
||||
add(msgid, msgstr, fuzzy)
|
||||
section = None
|
||||
fuzzy = 0
|
||||
# Record a fuzzy mark
|
||||
if l[:2] == '#,' and (l.find('fuzzy') >= 0):
|
||||
fuzzy = 1
|
||||
# Skip comments
|
||||
if l[0] == '#':
|
||||
continue
|
||||
# Start of msgid_plural section, separate from singular form with \0
|
||||
if l.startswith('msgid_plural'):
|
||||
msgid += '\0'
|
||||
l = l[12:]
|
||||
# Now we are in a msgid section, output previous section
|
||||
elif l.startswith('msgid'):
|
||||
if section == STR:
|
||||
add(msgid, msgstr, fuzzy)
|
||||
section = ID
|
||||
l = l[5:]
|
||||
msgid = msgstr = ''
|
||||
# Now we are in a msgstr section
|
||||
elif l.startswith('msgstr'):
|
||||
section = STR
|
||||
l = l[6:]
|
||||
# Check for plural forms
|
||||
if l.startswith('['):
|
||||
# Separate plural forms with \0
|
||||
if not l.startswith('[0]'):
|
||||
msgstr += '\0'
|
||||
# Ignore the index - must come in sequence
|
||||
l = l[l.index(']') + 1:]
|
||||
# Skip empty lines
|
||||
l = l.strip()
|
||||
if not l:
|
||||
continue
|
||||
# XXX: Does this always follow Python escape semantics?
|
||||
l = eval(l)
|
||||
if section == ID:
|
||||
msgid += l
|
||||
elif section == STR:
|
||||
msgstr += l
|
||||
else:
|
||||
print >> sys.stderr, 'Syntax error on %s:%d' % (infile, lno), \
|
||||
'before:'
|
||||
print >> sys.stderr, l
|
||||
sys.exit(1)
|
||||
# Add last entry
|
||||
if section == STR:
|
||||
add(msgid, msgstr, fuzzy)
|
||||
|
||||
# Compute output
|
||||
output = generate()
|
||||
|
||||
try:
|
||||
open(outfile,"wb").write(output)
|
||||
except IOError,msg:
|
||||
print >> sys.stderr, msg
|
||||
|
||||
|
||||
def main ():
|
||||
try:
|
||||
opts, args = getopt.getopt(sys.argv[1:], 'hVo:',
|
||||
['help', 'version', 'output-file='])
|
||||
except getopt.error, msg:
|
||||
usage(1, msg)
|
||||
|
||||
outfile = None
|
||||
# parse options
|
||||
for opt, arg in opts:
|
||||
if opt in ('-h', '--help'):
|
||||
usage(0)
|
||||
elif opt in ('-V', '--version'):
|
||||
print >> sys.stderr, "msgfmt.py", __version__
|
||||
sys.exit(0)
|
||||
elif opt in ('-o', '--output-file'):
|
||||
outfile = arg
|
||||
# do it
|
||||
if not args:
|
||||
print >> sys.stderr, 'No input file given'
|
||||
print >> sys.stderr, "Try `msgfmt --help' for more information."
|
||||
return
|
||||
|
||||
for filename in args:
|
||||
make(filename, outfile)
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
||||
|
|
@ -2,7 +2,6 @@
|
|||
requests>=2.2.0
|
||||
# optional:
|
||||
argcomplete
|
||||
#twill
|
||||
# for testing:
|
||||
#pytest
|
||||
#pyftpdlib
|
||||
|
|
|
|||
33
setup.py
33
setup.py
|
|
@ -99,7 +99,7 @@ except ImportError:
|
|||
has_py2app = False
|
||||
|
||||
# the application version
|
||||
AppVersion = "9.3"
|
||||
AppVersion = "9.4"
|
||||
# the application name
|
||||
AppName = "LinkChecker"
|
||||
Description = "check links in web documents or full websites"
|
||||
|
|
@ -119,8 +119,6 @@ MSVCP90Token = '1fc8b3b9a1e18e3b'
|
|||
|
||||
# basic includes for py2exe and py2app
|
||||
py_includes = ['dns.rdtypes.IN.*', 'dns.rdtypes.ANY.*',
|
||||
'twill.extensions.*', 'twill.extensions.match_parse.*',
|
||||
'twill.other_packages.*', 'twill.other_packages._mechanize_dist.*',
|
||||
'linkcheck.logger.*',
|
||||
]
|
||||
# basic excludes for py2exe and py2app
|
||||
|
|
@ -399,20 +397,13 @@ class MyInstallLib (install_lib, object):
|
|||
|
||||
|
||||
class MyInstallData (install_data, object):
|
||||
"""Handle locale files and permissions."""
|
||||
"""Fix file permissions."""
|
||||
|
||||
def run (self):
|
||||
"""Adjust permissions on POSIX systems."""
|
||||
self.add_message_files()
|
||||
super(MyInstallData, self).run()
|
||||
self.fix_permissions()
|
||||
|
||||
def add_message_files (self):
|
||||
"""Add locale message files to data_files list."""
|
||||
for (src, dst) in list_message_files(self.distribution.get_name()):
|
||||
dstdir = os.path.dirname(dst)
|
||||
self.data_files.append((dstdir, [os.path.join("build", dst)]))
|
||||
|
||||
def fix_permissions (self):
|
||||
"""Set correct read permissions on POSIX systems. Might also
|
||||
be possible by setting umask?"""
|
||||
|
|
@ -553,7 +544,7 @@ class MyBuildExt (build_ext, object):
|
|||
self.build_extension(ext)
|
||||
|
||||
|
||||
def list_message_files (package, suffix=".po"):
|
||||
def list_message_files (package, suffix=".mo"):
|
||||
"""Return list of all found message files and their installation paths."""
|
||||
for fname in glob.glob("po/*" + suffix):
|
||||
# basename (without extension) is a locale name
|
||||
|
|
@ -587,21 +578,9 @@ def check_manifest ():
|
|||
class MyBuild (build, object):
|
||||
"""Custom build command."""
|
||||
|
||||
def build_message_files (self):
|
||||
"""For each po/*.po, build .mo file in target locale directory."""
|
||||
# msgfmt.py is in the po/ subdirectory
|
||||
sys.path.append('po')
|
||||
import msgfmt
|
||||
for (src, dst) in list_message_files(self.distribution.get_name()):
|
||||
build_dst = os.path.join("build", dst)
|
||||
self.mkpath(os.path.dirname(build_dst))
|
||||
self.announce("Compiling %s -> %s" % (src, build_dst))
|
||||
msgfmt.make(src, build_dst)
|
||||
|
||||
def run (self):
|
||||
"""Check MANIFEST and build message files before building."""
|
||||
"""Check MANIFEST before building."""
|
||||
check_manifest()
|
||||
self.build_message_files()
|
||||
build.run(self)
|
||||
|
||||
|
||||
|
|
@ -670,6 +649,9 @@ data_files = [
|
|||
]),
|
||||
]
|
||||
|
||||
for (src, dst) in list_message_files(AppName):
|
||||
data_files.append((src, dst))
|
||||
|
||||
if os.name == 'posix':
|
||||
data_files.append(('share/man/man1', ['doc/en/linkchecker.1', 'doc/en/linkchecker-gui.1']))
|
||||
data_files.append(('share/man/man5', ['doc/en/linkcheckerrc.5']))
|
||||
|
|
@ -961,7 +943,6 @@ args = dict(
|
|||
# See also doc/install.txt for more detailed dependency documentation.
|
||||
#extra_requires = {
|
||||
# "IP country info": ['GeoIP'], # http://www.maxmind.com/app/python
|
||||
# "Login form": ['twill'], # http://twill.idyll.org/
|
||||
# "GNOME proxies": ['pygtk'], # http://www.pygtk.org/downloads.html
|
||||
# "Bash completion": ['argcomplete'], # https://pypi.python.org/pypi/argcomplete
|
||||
# "Memory debugging": ['meliae'], # https://launchpad.net/meliae
|
||||
|
|
|
|||
|
|
@ -15,14 +15,14 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
|
||||
"""
|
||||
Test miscellaneous html tag parsing.
|
||||
Test miscellaneous html tag parsing and URL types
|
||||
"""
|
||||
from . import LinkCheckTest
|
||||
|
||||
|
||||
class TestMisc (LinkCheckTest):
|
||||
"""
|
||||
Test link checking of HTML tags.
|
||||
Test misc link types.
|
||||
"""
|
||||
|
||||
def test_misc (self):
|
||||
|
|
@ -33,3 +33,17 @@ class TestMisc (LinkCheckTest):
|
|||
|
||||
def test_archive (self):
|
||||
self.file_test("archive.html")
|
||||
|
||||
def test_itms_services(self):
|
||||
url = u"itms-services:?action=download-manifest&url=http://www.example.com/"
|
||||
resultlines = [
|
||||
u"url %s" % url,
|
||||
u"cache key %s" % url,
|
||||
u"real url %s" % url,
|
||||
u"valid",
|
||||
u"url http://www.example.com/",
|
||||
u"cache key http://www.example.com/",
|
||||
u"real url http://www.example.com/",
|
||||
u"valid",
|
||||
]
|
||||
self.direct(url, resultlines, recursionlevel=1)
|
||||
|
|
|
|||
|
|
@ -38,6 +38,7 @@ class TestConfig (unittest.TestCase):
|
|||
config = linkcheck.configuration.Configuration()
|
||||
files = [get_file("config0.ini")]
|
||||
config.read(files)
|
||||
config.sanitize()
|
||||
# checking section
|
||||
for scheme in ("http", "https", "ftp"):
|
||||
self.assertTrue(scheme in config["allowedschemes"])
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2004-2012 Bastian Kleineidam
|
||||
# Copyright (C) 2004-2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
# -*- coding: iso-8859-1 -*-
|
||||
# Copyright (C) 2011 Bastian Kleineidam
|
||||
# Copyright (C) 2011-2014 Bastian Kleineidam
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or modify
|
||||
# it under the terms of the GNU General Public License as published by
|
||||
|
|
|
|||
Loading…
Reference in a new issue