Merge pull request #123 from PetrDlouhy/python3-easy

Add Python3 support - easiest changes
This commit is contained in:
anarcat 2018-01-23 14:18:03 -05:00 committed by GitHub
commit b3cc3c1911
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 117 additions and 85 deletions

View file

@ -83,7 +83,10 @@ def get_plist_data_from_string (data):
return biplist.readPlistFromString(data)
# fall back to normal plistlist
try:
return plistlib.readPlistFromString(data)
if hasattr(plistlib, 'readPlistFromBytes'): # Python 3
return plistlib.readPlistFromBytes(data)
else:
return plistlib.readPlistFromString(data)
except Exception:
# not parseable (eg. not well-formed, or binary)
return {}

View file

@ -17,12 +17,14 @@
"""
Mixin class for URLs that can be fetched over a proxy.
"""
import urllib
try:
import urlparse
try: # Python 3
from urllib import parse
from urllib import request
from urllib.parse import splitport
except ImportError:
# Python 3
from urllib import parse as urlparse
from urllib import splitport
import urllib as request
import urlparse as parse
import os
from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil
@ -40,7 +42,7 @@ class ProxySupport (object):
self.proxyauth = None
if not self.proxy:
return
proxyurl = urlparse.urlparse(self.proxy)
proxyurl = parse.urlparse(self.proxy)
self.proxytype = proxyurl.scheme
if self.proxytype not in ('http', 'https'):
# Note that invalid proxies might raise TypeError in urllib2,
@ -67,7 +69,7 @@ class ProxySupport (object):
def ignore_proxy_host (self):
"""Check if self.host is in the $no_proxy ignore list."""
if urllib.proxy_bypass(self.host):
if request.proxy_bypass(self.host):
return True
no_proxy = os.environ.get("no_proxy")
if no_proxy:
@ -96,7 +98,7 @@ class ProxySupport (object):
def parse_host_port (host_port):
"""Parse a host:port string into separate components."""
host, port = urllib.splitport(host_port.strip())
host, port = splitport(host_port.strip())
if port is not None:
if urlutil.is_numeric_port(port):
port = int(port)

View file

@ -20,12 +20,12 @@ Store metadata and options.
import os
import re
import urllib
try:
import urlparse
except ImportError:
# Python 3
from urllib import parse as urlparse
try: # Python 3
from urllib import parse
from urllib import request
except ImportError: # Python 2
import urlparse as parse
import urllib as request
import shutil
import socket
import _LinkChecker_configdata as configdata
@ -174,7 +174,7 @@ class Configuration (dict):
self["maxrequestspersecond"] = 10
self["maxhttpredirects"] = 10
self["nntpserver"] = os.environ.get("NNTP_SERVER", None)
self["proxy"] = urllib.getproxies()
self["proxy"] = request.getproxies()
self["sslverify"] = True
self["threads"] = 10
self["timeout"] = 60
@ -319,7 +319,7 @@ class Configuration (dict):
if not url.lower().startswith(("http:", "https:")):
log.warn(LOG_CHECK, _("login URL is not a HTTP URL."))
disable = True
urlparts = urlparse.urlsplit(url)
urlparts = parse.urlsplit(url)
if not urlparts[0] or not urlparts[1] or not urlparts[2]:
log.warn(LOG_CHECK, _("login URL is incomplete."))
disable = True

View file

@ -16,7 +16,10 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Parse configuration files"""
import ConfigParser
try: # Python 3
from configparser import RawConfigParser
except ImportError: # Python 2
from ConfigParser import RawConfigParser
import os
from .. import LinkCheckerError, get_link_pat, LOG_CHECK, log, fileutil, plugins, logconf
@ -30,7 +33,7 @@ def read_multiline (value):
yield line
class LCConfigParser (ConfigParser.RawConfigParser, object):
class LCConfigParser (RawConfigParser, object):
"""
Parse a LinkChecker configuration file.
"""

View file

@ -18,8 +18,14 @@
Parsing of cookies.
"""
import cookielib
import httplib
try: # Python 3
from http.cookiejar import split_header_words
except ImportError: # Python 2
from cookielib import split_header_words
try: # Python 3
from http.client import HTTPMessage
except ImportError: # Python 2
from httplib import HTTPMessage
import requests
from cStringIO import StringIO
@ -53,14 +59,14 @@ def from_headers (strheader):
"""
res = []
fp = StringIO(strheader)
headers = httplib.HTTPMessage(fp, seekable=True)
headers = HTTPMessage(fp, seekable=True)
if "Host" not in headers:
raise ValueError("Required header 'Host:' missing")
host = headers["Host"]
path= headers.get("Path", "/")
for header in headers.getallmatchingheaders("Set-Cookie"):
headervalue = header.split(':', 1)[1]
for pairs in cookielib.split_header_words([headervalue]):
for pairs in split_header_words([headervalue]):
for name, value in pairs:
cookie = requests.cookies.create_cookie(name, value,
domain=host, path=path)

View file

@ -18,7 +18,10 @@
Management of checking a queue of links with several threads.
"""
import os
import thread
try: # Python 3
from _thread import error as thread_error
except ImportError: # Python 2
from thread import error as thread_error
import time
from .. import log, LOG_CHECK, LinkCheckerInterrupt, plugins
from ..cache import urlqueue, robots_txt, results
@ -52,7 +55,7 @@ def check_urls (aggregate):
raise
except KeyboardInterrupt:
interrupt(aggregate)
except thread.error:
except thread_error:
log.warn(LOG_CHECK,
_("Could not start a new thread. Check that the current user" \
" is allowed to start new threads."))

View file

@ -18,14 +18,17 @@
Aggregate needed object instances for checker threads.
"""
import threading
import thread
try: # Python 3
import _thread
except ImportError:
import thread as _thread
import requests
import time
try:
import urlparse
try: # Python 3
from urllib import parse
except ImportError:
# Python 3
from urllib import parse as urlparse
import urlparse as parse
import random
from .. import log, LOG_CHECK, strformat, LinkCheckerError
from ..decorators import synchronized
@ -92,7 +95,7 @@ class Aggregate (object):
form.data[cgipassword] = password
for key, value in self.config["loginextrafields"].items():
form.data[key] = value
formurl = urlparse.urljoin(url, form.url)
formurl = parse.urljoin(url, form.url)
response = session.post(formurl, data=form.data)
self.cookies = session.cookies
if len(self.cookies) == 0:
@ -116,19 +119,19 @@ class Aggregate (object):
self.threads.append(t)
t.start()
else:
self.request_sessions[thread.get_ident()] = new_request_session(self.config, self.cookies)
self.request_sessions[_thread.get_ident()] = new_request_session(self.config, self.cookies)
checker.check_urls(self.urlqueue, self.logger)
@synchronized(_threads_lock)
def add_request_session(self):
"""Add a request session for current thread."""
session = new_request_session(self.config, self.cookies)
self.request_sessions[thread.get_ident()] = session
self.request_sessions[_thread.get_ident()] = session
@synchronized(_threads_lock)
def get_request_session(self):
"""Get the request session for current thread."""
return self.request_sessions[thread.get_ident()]
return self.request_sessions[_thread.get_ident()]
@synchronized(_hosts_lock)
def wait_for_host(self, host):

View file

@ -16,7 +16,10 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
"""Logger for aggregator instances"""
import threading
import thread
try: # Python 3
import _thread
except ImportError: # Python 2
import thread as _thread
from ..decorators import synchronized
_lock = threading.Lock()
@ -75,4 +78,4 @@ class Logger (object):
if logger.is_active:
break
else:
thread.interrupt_main()
_thread.interrupt_main()

View file

@ -14,7 +14,10 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
import thread
try: # Python 3
import _thread
except ImportError: # Python 2
import thread as _thread
from ..decorators import notimplemented
from .. import threader
from . import console
@ -28,7 +31,7 @@ class CheckedTask (threader.StoppableThread):
try:
self.run_checked()
except KeyboardInterrupt:
thread.interrupt_main()
_thread.interrupt_main()
except Exception:
self.internal_error()

View file

@ -179,7 +179,7 @@ class GzipFile:
self.fileobj.write(fname + '\000')
def _init_read(self):
self.crc = zlib.crc32("") & 0xffffffffL
self.crc = zlib.crc32("") & 0xffffffff
self.size = 0
def _read_gzip_header(self):
@ -226,7 +226,7 @@ class GzipFile:
raise ValueError, "write() on closed GzipFile object"
if len(data) > 0:
self.size = self.size + len(data)
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
self.fileobj.write( self.compress.compress(data) )
self.offset += len(data)
return len(data)
@ -325,7 +325,7 @@ class GzipFile:
self._new_member = True
def _add_read_data(self, data):
self.crc = zlib.crc32(data, self.crc) & 0xffffffffL
self.crc = zlib.crc32(data, self.crc) & 0xffffffff
self.extrabuf = self.extrabuf + data
self.extrasize = self.extrasize + len(data)
self.size = self.size + len(data)
@ -342,7 +342,7 @@ class GzipFile:
if crc32 != self.crc:
raise IOError("CRC check failed %s != %s" % (hex(crc32),
hex(self.crc)))
elif isize != (self.size & 0xffffffffL):
elif isize != (self.size & 0xffffffff):
raise IOError, "Incorrect length of data produced"
# Gzip files can be padded with zeroes and still have archives.
@ -365,7 +365,7 @@ class GzipFile:
self.fileobj.write(self.compress.flush())
write32u(self.fileobj, self.crc)
# self.size may exceed 2GB, or even 4GB
write32u(self.fileobj, self.size & 0xffffffffL)
write32u(self.fileobj, self.size & 0xffffffff)
self.fileobj = None
elif self.mode == READ:
self.fileobj = None

View file

@ -20,20 +20,25 @@ Robots.txt parser.
The robots.txt Exclusion Protocol is implemented as specified in
http://www.robotstxt.org/wc/norobots-rfc.html
"""
try:
import urlparse
except ImportError:
# Python 3
from urllib import parse as urlparse
import urllib
try: # Python 3
from urllib import parse
except ImportError: # Python 2
import urllib as parse
try: # Python 3
from urllib.parse import urlparse
except ImportError: # Python 2
from urlparse import urlparse
import time
import requests
from . import log, LOG_CHECK, configuration
__all__ = ["RobotFileParser"]
ACCEPT_ENCODING = 'x-gzip,gzip,deflate'
class RobotFileParser (object):
"""This class provides a set of methods to read, parse and answer
questions about a single robots.txt file."""
@ -79,7 +84,7 @@ class RobotFileParser (object):
def set_url (self, url):
"""Set the URL referring to a robots.txt file."""
self.url = url
self.host, self.path = urlparse.urlparse(url)[1:3]
self.host, self.path = urlparse(url)[1:3]
def read (self):
"""Read the robots.txt URL and feeds it to the parser."""
@ -162,7 +167,7 @@ class RobotFileParser (object):
line = line.split(':', 1)
if len(line) == 2:
line[0] = line[0].strip().lower()
line[1] = urllib.unquote(line[1].strip())
line[1] = parse.unquote(line[1].strip())
if line[0] == "user-agent":
if state == 2:
log.debug(LOG_CHECK, "%r line %d: missing blank line before user-agent directive", self.url, linenumber)
@ -230,7 +235,7 @@ class RobotFileParser (object):
return True
# search for given user agent matches
# the first match counts
url = urllib.quote(urlparse.urlparse(urllib.unquote(url))[2]) or "/"
url = parse.quote(urlparse(parse.unquote(url))[2]) or "/"
for entry in self.entries:
if entry.applies_to(useragent):
return entry.allowance(url)
@ -276,7 +281,7 @@ class RuleLine (object):
# an empty value means allow all
allowance = True
path = '/'
self.path = urllib.quote(path)
self.path = parse.quote(path)
self.allowance = allowance
def applies_to (self, path):

View file

@ -18,15 +18,17 @@
Functions for parsing and matching URL strings.
"""
import re
import os
try:
import urlparse
except ImportError:
# Python 3
import re
try: # Python 3
from urllib import parse
from urllib import parse as urlparse
import urllib
except ImportError: # Python 2
import urllib as parse
import urlparse
import requests
from . import log, LOG_CHECK
for scheme in ('ldap', 'irc'):
@ -162,9 +164,9 @@ def parse_qsl (qs, keep_blank_values=0, strict_parsing=0):
else:
continue
if nv[1] or keep_blank_values:
name = urllib.unquote(nv[0].replace('+', ' '))
name = parse.unquote(nv[0].replace('+', ' '))
if nv[1]:
value = urllib.unquote(nv[1].replace('+', ' '))
value = parse.unquote(nv[1].replace('+', ' '))
else:
value = nv[1]
r.append((name, value, sep))
@ -189,12 +191,12 @@ def idna_encode (host):
def url_fix_host (urlparts):
"""Unquote and fix hostname. Returns is_idn."""
if not urlparts[1]:
urlparts[2] = urllib.unquote(urlparts[2])
urlparts[2] = parse.unquote(urlparts[2])
return False
userpass, netloc = urllib.splituser(urlparts[1])
userpass, netloc = parse.splituser(urlparts[1])
if userpass:
userpass = urllib.unquote(userpass)
netloc, is_idn = idna_encode(urllib.unquote(netloc).lower())
userpass = parse.unquote(userpass)
netloc, is_idn = idna_encode(parse.unquote(netloc).lower())
# a leading backslash in path causes urlsplit() to add the
# path components up to the first slash to host
# try to find this case...
@ -205,7 +207,7 @@ def url_fix_host (urlparts):
if not urlparts[2] or urlparts[2] == '/':
urlparts[2] = comps
else:
urlparts[2] = "%s%s" % (comps, urllib.unquote(urlparts[2]))
urlparts[2] = "%s%s" % (comps, parse.unquote(urlparts[2]))
netloc = netloc[:i]
else:
# a leading ? in path causes urlsplit() to add the query to the
@ -214,7 +216,7 @@ def url_fix_host (urlparts):
if i != -1:
netloc, urlparts[3] = netloc.split('?', 1)
# path
urlparts[2] = urllib.unquote(urlparts[2])
urlparts[2] = parse.unquote(urlparts[2])
if userpass:
# append AT for easy concatenation
userpass += "@"
@ -311,7 +313,7 @@ def url_norm (url, encoding=None):
encode_unicode = False
urlparts = list(urlparse.urlsplit(url))
# scheme
urlparts[0] = urllib.unquote(urlparts[0]).lower()
urlparts[0] = parse.unquote(urlparts[0]).lower()
# mailto: urlsplit is broken
if urlparts[0] == 'mailto':
url_fix_mailto_urlsplit(urlparts)
@ -331,7 +333,7 @@ def url_norm (url, encoding=None):
# fix redundant path parts
urlparts[2] = collapse_segments(urlparts[2])
# anchor
urlparts[4] = urllib.unquote(urlparts[4])
urlparts[4] = parse.unquote(urlparts[4])
# quote parts again
urlparts[0] = url_quote_part(urlparts[0], encoding=encoding) # scheme
urlparts[1] = url_quote_part(urlparts[1], safechars='@:', encoding=encoding) # host
@ -418,11 +420,11 @@ def url_quote_part (s, safechars='/', encoding=None):
if encoding is None:
encoding = url_encoding
s = s.encode(encoding, 'ignore')
return urllib.quote(s, safechars)
return parse.quote(s, safechars)
def document_quote (document):
"""Quote given document."""
doc, query = urllib.splitquery(document)
doc, query = parse.splitquery(document)
doc = url_quote_part(doc, '/=,')
if query:
return "%s?%s" % (doc, query)
@ -473,8 +475,8 @@ def url_split (url):
hostname is always lowercased.
Precondition: url is syntactically correct URI (eg has no whitespace)
"""
scheme, netloc = urllib.splittype(url)
host, document = urllib.splithost(netloc)
scheme, netloc = parse.splittype(url)
host, document = parse.splithost(netloc)
port = default_ports.get(scheme, 0)
if host:
host = host.lower()

View file

@ -14,6 +14,8 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
from __future__ import print_function
import signal
import subprocess
import os
@ -288,9 +290,9 @@ def get_file (filename=None):
if __name__ == '__main__':
print "has clamav", has_clamav()
print "has network", has_network()
print "has msgfmt", has_msgfmt()
print "has POSIX", has_posix()
print "has proxy", has_proxy()
print "has X11", has_x11()
print("has clamav", has_clamav())
print("has network", has_network())
print("has msgfmt", has_msgfmt())
print("has POSIX", has_posix())
print("has proxy", has_proxy())
print("has X11", has_x11())

View file

@ -56,16 +56,13 @@ class TestGTranslator (unittest.TestCase):
def test_gtranslator (self):
"""Test all pofiles for GTranslator brokenness."""
for f in get_pofiles():
fd = file(f)
try:
with open(f, 'rb') as fd:
self.check_file(fd, f)
finally:
fd.close()
def check_file (self, fd, f):
"""Test for GTranslator broken syntax."""
for line in fd:
if line.strip().startswith("#"):
if line.strip().startswith(b"#"):
continue
self.assertFalse("\xc2\xb7" in line,
self.assertFalse(b"\xc2\xb7" in line,
"Broken GTranslator copy/paste in %r:\n%r" % (f, line))