From 0ee4414a60d6fd1e12561f59d0ae12f0da1cd506 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 1/6] Replace memoized with functools.lru_cache --- linkcheck/configuration/__init__.py | 4 +-- linkcheck/decorators.py | 28 --------------- linkcheck/fileutil.py | 6 ++-- tests/__init__.py | 53 ++++++++--------------------- 4 files changed, 19 insertions(+), 72 deletions(-) diff --git a/linkcheck/configuration/__init__.py b/linkcheck/configuration/__init__.py index c340cb7d..c2951c04 100644 --- a/linkcheck/configuration/__init__.py +++ b/linkcheck/configuration/__init__.py @@ -18,6 +18,7 @@ Store metadata and options. """ +from functools import lru_cache import os import re try: # Python 3 @@ -31,7 +32,6 @@ import socket import _LinkChecker_configdata as configdata from .. import (log, LOG_CHECK, get_install_data, fileutil) from . import confparse -from ..decorators import memoized from xdg.BaseDirectory import xdg_config_home, xdg_data_home Version = configdata.version @@ -576,7 +576,7 @@ def get_kde_home_dir (): loc_ro = re.compile(r"\[.*\]$") -@memoized +@lru_cache(1) def read_kioslaverc (kde_config_dir): """Read kioslaverc into data dictionary.""" data = {} diff --git a/linkcheck/decorators.py b/linkcheck/decorators.py index 1831537f..05cb596b 100644 --- a/linkcheck/decorators.py +++ b/linkcheck/decorators.py @@ -137,34 +137,6 @@ def timed (log=sys.stderr, limit=2.0): return lambda func: timeit(func, log, limit) -class memoized (object): - """Decorator that caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned, and - not re-evaluated.""" - - def __init__(self, func): - """Store function and initialize the cache.""" - self.func = func - self.cache = {} - - def __call__(self, *args): - """Lookup and return cached result if found. Else call stored - function with given arguments.""" - try: - return self.cache[args] - except KeyError: - self.cache[args] = value = self.func(*args) - return value - except TypeError: - # uncachable -- for instance, passing a list as an argument. - # Better to not cache than to blow up entirely. - return self.func(*args) - - def __repr__(self): - """Return the function's docstring.""" - return self.func.__doc__ - - class curried (object): """Decorator that returns a function that keeps returning functions until all arguments are supplied; then the original function is diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 8dd05a50..058a5c99 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -25,9 +25,9 @@ import fnmatch import tempfile import importlib from distutils.spawn import find_executable +from functools import lru_cache from builtins import str as str_text -from .decorators import memoized def write_file (filename, content, backup=False, callback=None): """Overwrite a possibly existing file with new content. Do this @@ -197,7 +197,7 @@ def is_tty (fp): return (hasattr(fp, "isatty") and fp.isatty()) -@memoized +@lru_cache(128) def is_readable(filename): """Check if file is a regular file and is readable.""" return os.path.isfile(filename) and os.access(filename, os.R_OK) @@ -215,7 +215,7 @@ def is_writable_by_others(filename): return mode & stat.S_IWOTH -@memoized +@lru_cache(128) def is_writable(filename): """Check if - the file is a regular file and is writable, or diff --git a/tests/__init__.py b/tests/__init__.py index d638a6d7..005b1f39 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -23,7 +23,7 @@ import sys import socket import pytest from contextlib import contextmanager -from functools import wraps +from functools import lru_cache, wraps from linkcheck import LinkCheckerInterrupt from builtins import str as str_text @@ -32,31 +32,6 @@ basedir = os.path.dirname(__file__) linkchecker_cmd = os.path.join(os.path.dirname(basedir), "linkchecker") -class memoized (object): - """Decorator that caches a function's return value each time it is called. - If called later with the same arguments, the cached value is returned, and - not re-evaluated.""" - - def __init__(self, func): - self.func = func - self.cache = {} - - def __call__(self, *args): - try: - return self.cache[args] - except KeyError: - self.cache[args] = value = self.func(*args) - return value - except TypeError: - # uncachable -- for instance, passing a list as an argument. - # Better to not cache than to blow up entirely. - return self.func(*args) - - def __repr__(self): - """Return the function's docstring.""" - return self.func.__doc__ - - def run (cmd, verbosity=0, **kwargs): """Run command without error checking. @return: command return code""" @@ -97,7 +72,7 @@ def _need_func (testfunc, name): return check_func -@memoized +@lru_cache(1) def has_network (): """Test if network is up.""" try: @@ -112,7 +87,7 @@ def has_network (): need_network = _need_func(has_network, "network") -@memoized +@lru_cache(1) def has_msgfmt (): """Test if msgfmt is available.""" return run_silent(["msgfmt", "-V"]) == 0 @@ -120,7 +95,7 @@ def has_msgfmt (): need_msgfmt = _need_func(has_msgfmt, "msgfmt") -@memoized +@lru_cache(1) def has_posix (): """Test if this is a POSIX system.""" return os.name == "posix" @@ -128,7 +103,7 @@ def has_posix (): need_posix = _need_func(has_posix, "POSIX system") -@memoized +@lru_cache(1) def has_windows (): """Test if this is a Windows system.""" return os.name == "nt" @@ -136,7 +111,7 @@ def has_windows (): need_windows = _need_func(has_windows, "Windows system") -@memoized +@lru_cache(1) def has_linux (): """Test if this is a Linux system.""" return sys.platform.startswith("linux") @@ -144,7 +119,7 @@ def has_linux (): need_linux = _need_func(has_linux, "Linux system") -@memoized +@lru_cache(1) def has_clamav (): """Test if ClamAV daemon is installed and running.""" try: @@ -162,7 +137,7 @@ def has_clamav (): need_clamav = _need_func(has_clamav, "ClamAV") -@memoized +@lru_cache(1) def has_proxy (): """Test if proxy is running on port 8081.""" try: @@ -176,7 +151,7 @@ def has_proxy (): need_proxy = _need_func(has_proxy, "proxy") -@memoized +@lru_cache(1) def has_pyftpdlib (): """Test if pyftpdlib is available.""" try: @@ -188,7 +163,7 @@ def has_pyftpdlib (): need_pyftpdlib = _need_func(has_pyftpdlib, "pyftpdlib") -@memoized +@lru_cache(1) def has_biplist (): """Test if biplist is available.""" try: @@ -200,7 +175,7 @@ def has_biplist (): need_biplist = _need_func(has_biplist, "biplist") -@memoized +@lru_cache(1) def has_newsserver (server): import nntplib try: @@ -224,7 +199,7 @@ def need_newsserver (server): -@memoized +@lru_cache(1) def has_x11 (): """Test if DISPLAY variable is set.""" return os.getenv('DISPLAY') is not None @@ -232,7 +207,7 @@ def has_x11 (): need_x11 = _need_func(has_x11, 'X11') -@memoized +@lru_cache(1) def has_word(): """Test if Word is available.""" from linkcheck.plugins import parseword @@ -241,7 +216,7 @@ def has_word(): need_word = _need_func(has_word, 'Word') -@memoized +@lru_cache(1) def has_pdflib(): from linkcheck.plugins import parsepdf return parsepdf.has_pdflib From 2eb1424703522732493baaeb628196a0e335d79f Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 2/6] Replace deprecated plistlib.readPlistFromBytes() in bookmarks.safari Remove Python 2 code. plistlib.loads() was added in Python 3.4. --- linkcheck/bookmarks/safari.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/linkcheck/bookmarks/safari.py b/linkcheck/bookmarks/safari.py index 6baab726..2e81025d 100644 --- a/linkcheck/bookmarks/safari.py +++ b/linkcheck/bookmarks/safari.py @@ -83,10 +83,7 @@ def get_plist_data_from_string (data): return biplist.readPlistFromString(data) # fall back to normal plistlist try: - if hasattr(plistlib, 'readPlistFromBytes'): # Python 3 - return plistlib.readPlistFromBytes(data) - else: - return plistlib.readPlistFromString(data) + return plistlib.loads(data) except Exception: # not parseable (eg. not well-formed, or binary) return {} From 504004d4f09b609d119b0e531fb73a8612566eb3 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 3/6] Use ipaddress in network.iputil.is_valid_ip() ipaddress was introduced in Python 3.3. --- linkcheck/network/iputil.py | 45 ++++--------------------------------- 1 file changed, 4 insertions(+), 41 deletions(-) diff --git a/linkcheck/network/iputil.py b/linkcheck/network/iputil.py index c6c7c965..ef7b6e02 100644 --- a/linkcheck/network/iputil.py +++ b/linkcheck/network/iputil.py @@ -18,55 +18,18 @@ Ip number related utility functions. """ +import ipaddress import re import socket from .. import log, LOG_CHECK - -# IP Adress regular expressions -# Note that each IPv4 octet can be encoded in dezimal, hexadezimal and octal. -_ipv4_num = r"\d{1,3}" -# XXX -_ipv4_num_4 = r"%s\.%s\.%s\.%s" % ((_ipv4_num,) * 4) -_ipv4_re = re.compile(r"^%s$" % _ipv4_num_4) - - -# IPv6; See also rfc2373 -_ipv6_num = r"[\da-f]{1,4}" -_ipv6_re = re.compile(r"^%s:%s:%s:%s:%s:%s:%s:%s$" % ((_ipv6_num,) * 8)) -_ipv6_ipv4_re = re.compile(r"^%s:%s:%s:%s:%s:%s:" % ((_ipv6_num,) * 6) + \ - r"%s$" % _ipv4_num_4) -_ipv6_abbr_re = re.compile(r"^((%s:){0,6}%s)?::((%s:){0,6}%s)?$" % \ - ((_ipv6_num,) * 4)) -_ipv6_ipv4_abbr_re = re.compile(r"^((%s:){0,4}%s)?::((%s:){0,5})?" % \ - ((_ipv6_num,) * 3) + \ - "%s$" % _ipv4_num_4) - - def is_valid_ip (ip): """ Return True if given ip is a valid IPv4 or IPv6 address. """ - return is_valid_ipv4(ip) or is_valid_ipv6(ip) - - -def is_valid_ipv4 (ip): - """ - Return True if given ip is a valid IPv4 address. - """ - if not _ipv4_re.match(ip): - return False - a, b, c, d = [int(i) for i in ip.split(".")] - return a <= 255 and b <= 255 and c <= 255 and d <= 255 - - -def is_valid_ipv6 (ip): - """ - Return True if given ip is a valid IPv6 address. - """ - # XXX this is not complete: check ipv6 and ipv4 semantics too here - if not (_ipv6_re.match(ip) or _ipv6_ipv4_re.match(ip) or - _ipv6_abbr_re.match(ip) or _ipv6_ipv4_abbr_re.match(ip)): + try: + ipaddress.ip_address(ip) + except ValueError: return False return True From c3860e2218a49fa9e6d10f21b2728eac3b0d0229 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 4/6] Remove third_party directory from MANIFEST.in Unused since: 0a13fae3 ("remove third party packages and use them as dependency", 2018-01-06) --- MANIFEST.in | 1 - 1 file changed, 1 deletion(-) diff --git a/MANIFEST.in b/MANIFEST.in index e337532c..169644a1 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -23,5 +23,4 @@ include doc/Makefile include doc/examples/*.sh doc/examples/*.bat doc/examples/*.py include doc/examples/linkcheckerrc_loginurl include scripts/*.sh scripts/*.py -recursive-include third_party *.py *.txt Makefile example* recursive-include tests *.py *.result *.html *.ico *.txt *.zip *.asc *.css *.xhtml *.sqlite *.adr *.swf From e7c5f353cd68b8249cb0d0f21d4de112b5e24625 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 5/6] Remove unused function linkcheck.fileutil.write_file() Doesn't appear to have ever been used. Causes flake8 error: linkcheck/fileutil.py:45:9: F821 undefined name 'file' --- linkcheck/fileutil.py | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 058a5c99..666a2c6a 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -29,35 +29,6 @@ from functools import lru_cache from builtins import str as str_text -def write_file (filename, content, backup=False, callback=None): - """Overwrite a possibly existing file with new content. Do this - in a manner that does not leave truncated or broken files behind. - @param filename: name of file to write - @type filename: string - @param content: file content to write - @type content: string - @param backup: if backup file should be left - @type backup: bool - @param callback: non-default storage function - @type callback: None or function taking two parameters (fileobj, content) - """ - # first write in a temp file - f = file(filename+".tmp", 'wb') - if callback is None: - f.write(content) - else: - callback(f, content) - f.close() - # move orig file to backup - if os.path.exists(filename): - os.rename(filename, filename+".bak") - # move temp file to orig - os.rename(filename+".tmp", filename) - # remove backup - if not backup and os.path.exists(filename+".bak"): - os.remove(filename+".bak") - - def has_module (name, without_error=True): """Test if given module can be imported. @param without_error: True if module must not throw any errors when importing From d2cb1b9dd61e251267bb163191525f35e189a50b Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Tue, 31 Mar 2020 19:46:31 +0100 Subject: [PATCH 6/6] Raise minimum Python version to 3.5 in setup.py --- setup.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/setup.py b/setup.py index 3afca9a7..5c223bf9 100755 --- a/setup.py +++ b/setup.py @@ -31,8 +31,8 @@ Change it very carefully. from __future__ import print_function import sys if not (hasattr(sys, 'version_info') or - sys.version_info < (2, 7, 0, 'final', 0)): - raise SystemExit("This program requires Python 2.7 or later.") + sys.version_info < (3, 5, 0, 'final', 0)): + raise SystemExit("This program requires Python 3.5 or later.") import os import re import codecs