Merge pull request #382 from cjmayo/tidyten5

Make urllib imports and html.escape Python 3 only
2026-04-30 11:04:50 +00:00 · 2020-05-15 19:15:47 +01:00 · 2020-05-15 19:15:47 +01:00 · f3eb787014
commit f3eb787014
parent 91a069ac90 bda9612273
16 changed files with 103 additions and 181 deletions
--- a/linkcheck/checker/init.py
+++ b/linkcheck/checker/init.py
@ -18,11 +18,9 @@ Main functions for link checking.
 """

 import os
-from html import escape as html_escape
-try: # Python 3
-    from urllib import parse as urlparse
-except ImportError:
-    import urllib as urlparse
+import html
+import urllib.parse
+
 from .. import strformat, url as urlutil, log, LOG_CHECK

 MAX_FILESIZE = 1024*1024*10 # 10MB
@ -165,9 +163,9 @@ def get_index_html (urls):
    """
    lines = ["<html>", "<body>"]
    for entry in urls:
-        name = html_escape(entry)
+        name = html.escape(entry)
        try:
-            url = html_escape(urlparse.quote(entry))
+            url = html.escape(urllib.parse.quote(entry))
        except KeyError:
            # Some unicode entries raise KeyError.
            url = name
--- a/linkcheck/checker/fileurl.py
+++ b/linkcheck/checker/fileurl.py
@ -19,20 +19,8 @@ Handle local file: links.

 import re
 import os
-try:
-    import urlparse
-except ImportError:
-    # Python 3
-    from urllib import parse as urlparse
-try:  # Python 3
-    from urllib import request as urlrequest
-except ImportError:
-    import urllib as urlrequest
-try:
-    from urllib2 import urlopen
-except ImportError:
-    # Python 3
-    from urllib.request import urlopen
+import urllib.parse
+import urllib.request
 from builtins import str as str_text
 from datetime import datetime

@ -82,7 +70,7 @@ def get_os_filename (path):
    """Return filesystem path for given URL path."""
    if os.name == 'nt':
        path = prepare_urlpath_for_nt(path)
-    res = urlrequest.url2pathname(fileutil.path_safe(path))
+    res = urllib.request.url2pathname(fileutil.path_safe(path))
    if os.name == 'nt' and res.endswith(':') and len(res) == 2:
        # Work around http://bugs.python.org/issue11474
        res += os.sep
@ -153,7 +141,7 @@ class FileUrl (urlbase.UrlBase):
            from .urlbase import url_norm
            # norm base url - can raise UnicodeError from url.idna_encode()
            base_url, is_idn = url_norm(self.base_url, self.encoding)
-            urlparts = list(urlparse.urlsplit(base_url))
+            urlparts = list(urllib.parse.urlsplit(base_url))
            # ignore query part for filesystem urls
            urlparts[3] = ''
            self.base_url = urlutil.urlunsplit(urlparts)
@ -189,7 +177,7 @@ class FileUrl (urlbase.UrlBase):
            self.set_result(_("directory"))
        else:
            url = fileutil.path_safe(self.url)
-            self.url_connection = urlopen(url)
+            self.url_connection = urllib.request.urlopen(url)
            self.check_case_sensitivity()

    def check_case_sensitivity (self):
--- a/linkcheck/checker/mailtourl.py
+++ b/linkcheck/checker/mailtourl.py
@ -18,11 +18,7 @@ Handle for mailto: links.
 """

 import re
-try:
-    import urlparse
-except ImportError:
-    # Python 3
-    from urllib import parse as urlparse
+import urllib.parse
 from email._parseaddr import AddressList

 from . import urlbase
@ -94,7 +90,7 @@ class MailtoUrl (urlbase.UrlBase):
        Stores parsed addresses in the self.addresses set.
        """
        # cut off leading mailto: and unquote
-        url = urlparse.unquote(self.base_url[7:], self.encoding)
+        url = urllib.parse.unquote(self.base_url[7:], self.encoding)
        # search for cc, bcc, to and store in headers
        mode = 0 # 0=default, 1=quote, 2=esc
        quote = None
@ -118,11 +114,11 @@ class MailtoUrl (urlbase.UrlBase):
        if i < (len(url) - 1):
            self.addresses.update(getaddresses(url[:i]))
            try:
-                headers = urlparse.parse_qs(url[(i+1):], strict_parsing=True)
+                headers = urllib.parse.parse_qs(url[(i+1):], strict_parsing=True)
                for key, vals in headers.items():
                    if key.lower() in EMAIL_CGI_ADDRESS:
                        # Only the first header value is added
-                        self.addresses.update(getaddresses(urlparse.unquote(vals[0], self.encoding)))
+                        self.addresses.update(getaddresses(urllib.parse.unquote(vals[0], self.encoding)))
                    if key.lower() == EMAIL_CGI_SUBJECT:
                        self.subject = vals[0]
            except ValueError as err:
--- a/linkcheck/checker/proxysupport.py
+++ b/linkcheck/checker/proxysupport.py
@ -16,14 +16,8 @@
 """
 Mixin class for URLs that can be fetched over a proxy.
 """
-try:  # Python 3
-    from urllib import parse
-    from urllib import request
-    from urllib.parse import splitport
-except ImportError:
-    from urllib import splitport
-    import urllib as request
-    import urlparse as parse
+import urllib.parse
+import urllib.request
 import os
 from .. import LinkCheckerError, log, LOG_CHECK, url as urlutil, httputil

@ -41,7 +35,7 @@ class ProxySupport:
        self.proxyauth = None
        if not self.proxy:
            return
-        proxyurl = parse.urlparse(self.proxy)
+        proxyurl = urllib.parse.urlparse(self.proxy)
        self.proxytype = proxyurl.scheme
        if self.proxytype not in ('http', 'https'):
            # Note that invalid proxies might raise TypeError in urllib2,
@ -68,11 +62,11 @@ class ProxySupport:

    def ignore_proxy_host (self):
        """Check if self.host is in the $no_proxy ignore list."""
-        if request.proxy_bypass(self.host):
+        if urllib.request.proxy_bypass(self.host):
            return True
        no_proxy = os.environ.get("no_proxy")
        if no_proxy:
-            entries = [parse_host_port(x) for x in no_proxy.split(",")]
+            entries = [urlutil.splitport(x.strip()) for x in no_proxy.split(",")]
            for host, port in entries:
                if host.lower() == self.host and port == self.port:
                    return True
@ -93,12 +87,3 @@ class ProxySupport:
            host = self.host
            port = self.port
        return (scheme, host, port)
-
-
-def parse_host_port (host_port):
-    """Parse a host:port string into separate components."""
-    host, port = splitport(host_port.strip())
-    if port is not None:
-        if urlutil.is_numeric_port(port):
-            port = int(port)
-    return host, port
--- a/linkcheck/checker/urlbase.py
+++ b/linkcheck/checker/urlbase.py
@ -18,21 +18,8 @@ Base URL handler.
 """
 import sys
 import os
-try:
-    import urlparse
-except ImportError:
-    # Python 3
-    from urllib import parse as urlparse
-try:  # Python 3
-    from urllib import parse as urllib_parse
-except ImportError:
-    import urllib as urllib_parse
-try:
-    from urllib2 import urlopen
-except ImportError:
-    # Python 3
-    from urllib.request import urlopen
-import urllib
+import urllib.parse
+from urllib.request import urlopen
 import time
 import errno
 import socket
@ -66,7 +53,7 @@ def urljoin (parent, url):
    """
    if urlutil.url_is_absolute(url):
        return url
-    return urlparse.urljoin(parent, url)
+    return urllib.parse.urljoin(parent, url)


 def url_norm (url, encoding):
@ -372,14 +359,14 @@ class UrlBase:
            self.url = urljoin(self.base_ref, base_url)
        elif self.parent_url:
            # strip the parent url query and anchor
-            urlparts = list(urlparse.urlsplit(self.parent_url))
+            urlparts = list(urllib.parse.urlsplit(self.parent_url))
            urlparts[4] = ""
            parent_url = urlutil.urlunsplit(urlparts)
            self.url = urljoin(parent_url, base_url)
        else:
            self.url = base_url
        # urljoin can unnorm the url path, so norm it again
-        urlparts = list(urlparse.urlsplit(self.url))
+        urlparts = list(urllib.parse.urlsplit(self.url))
        if urlparts[2]:
            urlparts[2] = urlutil.collapse_segments(urlparts[2])
            if not urlparts[0].startswith("feed"):
@ -396,7 +383,7 @@ class UrlBase:
        Also checks for obfuscated IP addresses.
        """
        # check userinfo@host:port syntax
-        self.userinfo, host = urllib_parse.splituser(self.urlparts[1])
+        self.userinfo, host = urllib.parse.splituser(self.urlparts[1])
        port = urlutil.default_ports.get(self.scheme, 0)
        host, port = urlutil.splitport(host, port=port)
        if port is None:
@ -676,7 +663,7 @@ class UrlBase:
        """
        if self.userinfo:
            # URL itself has authentication info
-            return urllib_parse.splitpasswd(self.userinfo)
+            return urllib.parse.splitpasswd(self.userinfo)
        return self.aggregate.config.get_user_password(self.url)

    def add_url (self, url, line=0, column=0, page=0, name="", base=None):
--- a/linkcheck/configuration/init.py
+++ b/linkcheck/configuration/init.py
@ -20,12 +20,8 @@ Store metadata and options.
 from functools import lru_cache
 import os
 import re
-try:  # Python 3
-    from urllib import parse
-    from urllib import request
-except ImportError:  # Python 2
-    import urlparse as parse
-    import urllib as request
+import urllib.parse
+import urllib.request
 import shutil
 import socket
 import _LinkChecker_configdata as configdata
@ -172,7 +168,7 @@ class Configuration (dict):
        self["maxrequestspersecond"] = 10
        self["maxhttpredirects"] = 10
        self["nntpserver"] = os.environ.get("NNTP_SERVER", None)
-        self["proxy"] = request.getproxies()
+        self["proxy"] = urllib.request.getproxies()
        self["sslverify"] = True
        self["threads"] = 10
        self["timeout"] = 60
@ -317,7 +313,7 @@ class Configuration (dict):
        if not url.lower().startswith(("http:", "https:")):
            log.warn(LOG_CHECK, _("login URL is not a HTTP URL."))
            disable = True
-        urlparts = parse.urlsplit(url)
+        urlparts = urllib.parse.urlsplit(url)
        if not urlparts[0] or not urlparts[1] or not urlparts[2]:
            log.warn(LOG_CHECK, _("login URL is incomplete."))
            disable = True
--- a/linkcheck/director/aggregator.py
+++ b/linkcheck/director/aggregator.py
@ -24,10 +24,7 @@ except ImportError:

 import requests
 import time
-try:  # Python 3
-    from urllib import parse
-except ImportError:
-    import urlparse as parse
+import urllib.parse
 import random
 from .. import log, LOG_CHECK, strformat, LinkCheckerError
 from ..decorators import synchronized
@ -103,7 +100,7 @@ class Aggregate:
            form.data[cgipassword] = password
        for key, value in self.config["loginextrafields"].items():
            form.data[key] = value
-        formurl = parse.urljoin(url, form.url)
+        formurl = urllib.parse.urljoin(url, form.url)
        log.debug(LOG_CHECK, "Posting login data to %s", formurl)
        response = session.post(formurl, data=form.data)
        response.raise_for_status()
--- a/linkcheck/lc_cgi.py
+++ b/linkcheck/lc_cgi.py
@ -17,17 +17,14 @@
 Functions used by the WSGI script.
 """

-from html import escape as html_escape
+import html
 import os
 import threading
 import locale
 import re
 import time
-try:
-    import urlparse
-except ImportError:
-    # Python 3
-    from urllib import parse as urlparse
+import urllib.parse
+
 from . import configuration, strformat, checker, director, get_link_pat, \
    init_i18n, url as urlutil
 from .decorators import synchronized
@ -54,7 +51,7 @@ def application(environ, start_response):
        request_body = environ['wsgi.input'].read(request_body_size)
    else:
        request_body = environ['wsgi.input'].read()
-    form = urlparse.parse_qs(request_body.decode(HTML_ENCODING))
+    form = urllib.parse.parse_qs(request_body.decode(HTML_ENCODING))

    status = '200 OK'
    start_response(status, get_response_headers())
@ -188,7 +185,7 @@ def get_configuration(form, out):

 def get_host_name (form):
    """Return host name of given URL."""
-    return urlparse.urlparse(formvalue(form, "url"))[1]
+    return urllib.parse.urlparse(formvalue(form, "url"))[1]


 def checkform (form, env):
@ -264,4 +261,4 @@ contains only these characters: <code>A-Za-z0-9./_~-</code><br/><br/>
 Errors are logged.
 </blockquote>
 </body>
-</html>""") % html_escape(why)
+</html>""") % html.escape(why)
--- a/linkcheck/logger/html.py
+++ b/linkcheck/logger/html.py
@ -17,7 +17,7 @@
 A HTML logger.
 """

-from html import escape as html_escape
+import html
 import os
 import time

@ -174,12 +174,12 @@ class HtmlLogger (_Logger):
        self.writeln("<tr>")
        self.writeln('<td class="url">%s</td>' % self.part("url"))
        self.write('<td class="url">')
-        self.write("`%s'" % html_escape(url_data.base_url))
+        self.write("`%s'" % html.escape(url_data.base_url))
        self.writeln("</td></tr>")

    def write_name (self, url_data):
        """Write url_data.name."""
-        args = (self.part("name"), html_escape(url_data.name))
+        args = (self.part("name"), html.escape(url_data.name))
        self.writeln("<tr><td>%s</td><td>`%s'</td></tr>" % args)

    def write_parent (self, url_data):
@ -187,7 +187,7 @@ class HtmlLogger (_Logger):
        self.write("<tr><td>"+self.part("parenturl")+
                   '</td><td><a target="top" href="'+
                   url_data.parent_url+'">'+
-                   html_escape(url_data.parent_url)+"</a>")
+                   html.escape(url_data.parent_url)+"</a>")
        if url_data.line is not None:
            self.write(_(", line %d") % url_data.line)
        if url_data.column is not None:
@ -206,13 +206,13 @@ class HtmlLogger (_Logger):
    def write_base (self, url_data):
        """Write url_data.base_ref."""
        self.writeln("<tr><td>"+self.part("base")+"</td><td>"+
-                     html_escape(url_data.base_ref)+"</td></tr>")
+                     html.escape(url_data.base_ref)+"</td></tr>")

    def write_real (self, url_data):
        """Write url_data.url."""
        self.writeln("<tr><td>"+self.part("realurl")+"</td><td>"+
                     '<a target="top" href="'+url_data.url+
-                     '">'+html_escape(url_data.url)+"</a></td></tr>")
+                     '">'+html.escape(url_data.url)+"</a></td></tr>")

    def write_dltime (self, url_data):
        """Write url_data.dltime."""
@ -234,20 +234,20 @@ class HtmlLogger (_Logger):
    def write_info (self, url_data):
        """Write url_data.info."""
        sep = "<br/>"+os.linesep
-        text = sep.join(html_escape(x) for x in url_data.info)
+        text = sep.join(html.escape(x) for x in url_data.info)
        self.writeln('<tr><td valign="top">' + self.part("info")+
               "</td><td>"+text+"</td></tr>")

    def write_modified(self, url_data):
        """Write url_data.modified."""
-        text = html_escape(self.format_modified(url_data.modified))
+        text = html.escape(self.format_modified(url_data.modified))
        self.writeln('<tr><td valign="top">' + self.part("modified") +
            "</td><td>"+text+"</td></tr>")

    def write_warning (self, url_data):
        """Write url_data.warnings."""
        sep = "<br/>"+os.linesep
-        text = sep.join(html_escape(x[1]) for x in url_data.warnings)
+        text = sep.join(html.escape(x[1]) for x in url_data.warnings)
        self.writeln('<tr><td class="warning" '+
                     'valign="top">' + self.part("warning") +
                     '</td><td class="warning">' + text + "</td></tr>")
@ -258,14 +258,14 @@ class HtmlLogger (_Logger):
            self.write('<tr><td class="valid">')
            self.write(self.part("result"))
            self.write('</td><td class="valid">')
-            self.write(html_escape(_("Valid")))
+            self.write(html.escape(_("Valid")))
        else:
            self.write('<tr><td class="error">')
            self.write(self.part("result"))
            self.write('</td><td class="error">')
-            self.write(html_escape(_("Error")))
+            self.write(html.escape(_("Error")))
        if url_data.result:
-            self.write(": "+html_escape(url_data.result))
+            self.write(": "+html.escape(url_data.result))
        self.writeln("</td></tr>")

    def write_stats (self):
--- a/linkcheck/plugins/anchorcheck.py
+++ b/linkcheck/plugins/anchorcheck.py
@ -16,7 +16,7 @@
 """
 Check HTML anchors
 """
-from urllib import parse
+import urllib.parse

 from . import _ContentPlugin
 from .. import log, LOG_PLUGIN
@ -48,7 +48,7 @@ class AnchorCheck(_ContentPlugin):
        A warning is logged and True is returned if the anchor is not found.
        """
        log.debug(LOG_PLUGIN, "checking anchor %r in %s", url_data.anchor, self.anchors)
-        if any(x for x in self.anchors if parse.quote(x[0]) == url_data.anchor):
+        if any(x for x in self.anchors if urllib.parse.quote(x[0]) == url_data.anchor):
            return
        if self.anchors:
            anchornames = sorted(set("`%s'" % x[0] for x in self.anchors))
--- a/linkcheck/robotparser2.py
+++ b/linkcheck/robotparser2.py
@ -19,15 +19,8 @@ Robots.txt parser.
 The robots.txt Exclusion Protocol is implemented as specified in
 http://www.robotstxt.org/wc/norobots-rfc.html
 """
-try:  # Python 3
-    from urllib import parse
-except ImportError:  # Python 2
-    import urllib as parse
-try:  # Python 3
-    from urllib.parse import urlparse
-except ImportError:  # Python 2
-    from urlparse import urlparse
 import time
+import urllib.parse

 import requests

@ -84,7 +77,7 @@ class RobotFileParser:
    def set_url (self, url):
        """Set the URL referring to a robots.txt file."""
        self.url = url
-        self.host, self.path = urlparse(url)[1:3]
+        self.host, self.path = urllib.parse.urlparse(url)[1:3]

    def read (self):
        """Read the robots.txt URL and feeds it to the parser."""
@ -168,7 +161,7 @@ class RobotFileParser:
            line = line.split(':', 1)
            if len(line) == 2:
                line[0] = line[0].strip().lower()
-                line[1] = parse.unquote(line[1].strip(), self.encoding)
+                line[1] = urllib.parse.unquote(line[1].strip(), self.encoding)
                if line[0] == "user-agent":
                    if state == 2:
                        log.debug(LOG_CHECK, "%r line %d: missing blank line before user-agent directive", self.url, linenumber)
@ -236,7 +229,7 @@ class RobotFileParser:
            return True
        # search for given user agent matches
        # the first match counts
-        url = parse.quote(urlparse(parse.unquote(url))[2]) or "/"
+        url = urllib.parse.quote(urllib.parse.urlparse(urllib.parse.unquote(url))[2]) or "/"
        for entry in self.entries:
            if entry.applies_to(useragent):
                return entry.allowance(url)
@ -282,7 +275,7 @@ class RuleLine:
            # an empty value means allow all
            allowance = True
            path = '/'
-        self.path = parse.quote(path)
+        self.path = urllib.parse.quote(path)
        self.allowance = allowance

    def applies_to (self, path):
--- a/linkcheck/strformat.py
+++ b/linkcheck/strformat.py
@ -32,11 +32,7 @@ import codecs
 import os
 import math
 import time
-try:
-    import urlparse
-except ImportError:
-    # Python 3
-    from urllib import parse as urlparse
+import urllib.parse
 import locale
 import pydoc
 from . import i18n
@ -95,8 +91,8 @@ def is_encoding (text):


 def url_unicode_split (url):
-    """Like urlparse.urlsplit(), but always returning unicode parts."""
-    return [unicode_safe(s) for s in urlparse.urlsplit(url)]
+    """Like urllib.parse.urlsplit(), but always returning unicode parts."""
+    return [unicode_safe(s) for s in urllib.parse.urlsplit(url)]


 def unquote (s, matching=False):
--- a/linkcheck/url.py
+++ b/linkcheck/url.py
@ -19,12 +19,7 @@ Functions for parsing and matching URL strings.

 import os
 import re
-try:  # Python 3
-    from urllib import parse
-    from urllib import parse as urlparse
-except ImportError:  # Python 2
-    import urllib as parse
-    import urlparse
+import urllib.parse

 import requests
 from builtins import str as str_text
@ -32,8 +27,8 @@ from builtins import str as str_text
 from . import log, LOG_CHECK

 for scheme in ('ldap', 'irc'):
-    if scheme not in urlparse.uses_netloc:
-        urlparse.uses_netloc.append(scheme)
+    if scheme not in urllib.parse.uses_netloc:
+        urllib.parse.uses_netloc.append(scheme)

 # The character set to encode non-ASCII characters in a URL. See also
 # http://tools.ietf.org/html/rfc2396#section-2.1
@ -164,9 +159,9 @@ def parse_qsl (qs, encoding, keep_blank_values=0, strict_parsing=0):
            else:
                continue
        if nv[1] or keep_blank_values:
-            name = parse.unquote(nv[0].replace('+', ' '), encoding=encoding)
+            name = urllib.parse.unquote(nv[0].replace('+', ' '), encoding=encoding)
            if nv[1]:
-                value = parse.unquote(nv[1].replace('+', ' '), encoding=encoding)
+                value = urllib.parse.unquote(nv[1].replace('+', ' '), encoding=encoding)
            else:
                value = nv[1]
            r.append((name, value, sep))
@ -191,12 +186,12 @@ def idna_encode (host):
 def url_fix_host (urlparts, encoding):
    """Unquote and fix hostname. Returns is_idn."""
    if not urlparts[1]:
-        urlparts[2] = parse.unquote(urlparts[2], encoding=encoding)
+        urlparts[2] = urllib.parse.unquote(urlparts[2], encoding=encoding)
        return False
-    userpass, netloc = parse.splituser(urlparts[1])
+    userpass, netloc = urllib.parse.splituser(urlparts[1])
    if userpass:
-        userpass = parse.unquote(userpass, encoding=encoding)
-    netloc, is_idn = idna_encode(parse.unquote(netloc, encoding=encoding).lower())
+        userpass = urllib.parse.unquote(userpass, encoding=encoding)
+    netloc, is_idn = idna_encode(urllib.parse.unquote(netloc, encoding=encoding).lower())
    # a leading backslash in path causes urlsplit() to add the
    # path components up to the first slash to host
    # try to find this case...
@ -207,7 +202,7 @@ def url_fix_host (urlparts, encoding):
        if not urlparts[2] or urlparts[2] == '/':
            urlparts[2] = comps
        else:
-            urlparts[2] = "%s%s" % (comps, parse.unquote(urlparts[2], encoding=encoding))
+            urlparts[2] = "%s%s" % (comps, urllib.parse.unquote(urlparts[2], encoding=encoding))
        netloc = netloc[:i]
    else:
        # a leading ? in path causes urlsplit() to add the query to the
@ -216,7 +211,7 @@ def url_fix_host (urlparts, encoding):
        if i != -1:
            netloc, urlparts[3] = netloc.split('?', 1)
        # path
-        urlparts[2] = parse.unquote(urlparts[2], encoding=encoding)
+        urlparts[2] = urllib.parse.unquote(urlparts[2], encoding=encoding)
    if userpass:
        # append AT for easy concatenation
        userpass += "@"
@ -266,9 +261,9 @@ def url_parse_query (query, encoding):
        append = '?'+url_parse_query(rest, encoding=encoding)+append
    l = []
    for k, v, sep in parse_qsl(query, keep_blank_values=True, encoding=encoding):
-        k = parse.quote(k, safe='/-:,;')
+        k = urllib.parse.quote(k, safe='/-:,;')
        if v:
-            v = parse.quote(v, safe='/-:,;')
+            v = urllib.parse.quote(v, safe='/-:,;')
            l.append("%s=%s%s" % (k, v, sep))
        elif v is None:
            l.append("%s%s" % (k, sep))
@ -279,12 +274,12 @@ def url_parse_query (query, encoding):


 def urlunsplit (urlparts):
-    """Same as urlparse.urlunsplit but with extra UNC path handling
+    """Same as urllib.parse.urlunsplit but with extra UNC path handling
    for Windows OS."""
-    res = urlparse.urlunsplit(urlparts)
+    res = urllib.parse.urlunsplit(urlparts)
    if os.name == 'nt' and urlparts[0] == 'file' and '|' not in urlparts[2]:
        # UNC paths must have 4 slashes: 'file:////server/path'
-        # Depending on the path in urlparts[2], urlparse.urlunsplit()
+        # Depending on the path in urlparts[2], urllib.parse.urlunsplit()
        # left only two or three slashes. This is fixed below
        repl = 'file://' if urlparts[2].startswith('//') else 'file:/'
        res = res.replace('file:', repl)
@ -298,9 +293,9 @@ def url_norm (url, encoding):
    @return: (normed url, idna flag)
    @rtype: tuple of length two
    """
-    urlparts = list(urlparse.urlsplit(url))
+    urlparts = list(urllib.parse.urlsplit(url))
    # scheme
-    urlparts[0] = parse.unquote(urlparts[0], encoding=encoding).lower()
+    urlparts[0] = urllib.parse.unquote(urlparts[0], encoding=encoding).lower()
    # mailto: urlsplit is broken
    if urlparts[0] == 'mailto':
        url_fix_mailto_urlsplit(urlparts)
@ -308,7 +303,7 @@ def url_norm (url, encoding):
    is_idn = url_fix_host(urlparts, encoding)
    # query
    urlparts[3] = url_parse_query(urlparts[3], encoding=encoding)
-    if urlparts[0] in urlparse.uses_relative:
+    if urlparts[0] in urllib.parse.uses_relative:
        # URL has a hierarchical path we should norm
        if not urlparts[2]:
            # Empty path is allowed if both query and fragment are also empty.
@ -320,14 +315,14 @@ def url_norm (url, encoding):
            # fix redundant path parts
            urlparts[2] = collapse_segments(urlparts[2])
    # anchor
-    urlparts[4] = parse.unquote(urlparts[4], encoding=encoding)
+    urlparts[4] = urllib.parse.unquote(urlparts[4], encoding=encoding)
    # quote parts again
-    urlparts[0] = parse.quote(urlparts[0]) # scheme
-    urlparts[1] = parse.quote(urlparts[1], safe='@:') # host
-    urlparts[2] = parse.quote(urlparts[2], safe=_nopathquote_chars) # path
+    urlparts[0] = urllib.parse.quote(urlparts[0]) # scheme
+    urlparts[1] = urllib.parse.quote(urlparts[1], safe='@:') # host
+    urlparts[2] = urllib.parse.quote(urlparts[2], safe=_nopathquote_chars) # path
    if not urlparts[0].startswith("feed"):
        urlparts[2] = url_fix_wayback_query(urlparts[2]) # unencode colon in http[s]:// in wayback path
-    urlparts[4] = parse.quote(urlparts[4], safe="!$&'()*+,-./;=?@_~") # anchor
+    urlparts[4] = urllib.parse.quote(urlparts[4], safe="!$&'()*+,-./;=?@_~") # anchor
    res = urlunsplit(urlparts)
    if url.endswith('#') and not urlparts[4]:
        # re-append trailing empty fragment
@ -380,28 +375,28 @@ def url_quote (url, encoding):
    """Quote given URL."""
    if not url_is_absolute(url):
        return document_quote(url)
-    urlparts = list(urlparse.urlsplit(url))
-    urlparts[0] = parse.quote(urlparts[0]) # scheme
-    urlparts[1] = parse.quote(urlparts[1], safe=':') # host
-    urlparts[2] = parse.quote(urlparts[2], safe='/=,') # path
-    urlparts[3] = parse.quote(urlparts[3], safe='&=,') # query
+    urlparts = list(urllib.parse.urlsplit(url))
+    urlparts[0] = urllib.parse.quote(urlparts[0]) # scheme
+    urlparts[1] = urllib.parse.quote(urlparts[1], safe=':') # host
+    urlparts[2] = urllib.parse.quote(urlparts[2], safe='/=,') # path
+    urlparts[3] = urllib.parse.quote(urlparts[3], safe='&=,') # query
    l = []
    for k, v, sep in parse_qsl(urlparts[3], encoding=encoding, keep_blank_values=True): # query
-        k = parse.quote(k, safe='/-:,;')
+        k = urllib.parse.quote(k, safe='/-:,;')
        if v:
-            v = parse.quote(v, safe='/-:,;')
+            v = urllib.parse.quote(v, safe='/-:,;')
            l.append("%s=%s%s" % (k, v, sep))
        else:
            l.append("%s%s" % (k, sep))
    urlparts[3] = ''.join(l)
-    urlparts[4] = parse.quote(urlparts[4]) # anchor
+    urlparts[4] = urllib.parse.quote(urlparts[4]) # anchor
    return urlunsplit(urlparts)


 def document_quote (document):
    """Quote given document."""
-    doc, query = parse.splitquery(document)
-    doc = parse.quote(doc, safe='/=,')
+    doc, query = urllib.parse.splitquery(document)
+    doc = urllib.parse.quote(doc, safe='/=,')
    if query:
        return "%s?%s" % (doc, query)
    return doc
@ -451,8 +446,8 @@ def url_split (url):
    hostname is always lowercased.
    Precondition: url is syntactically correct URI (eg has no whitespace)
    """
-    scheme, netloc = parse.splittype(url)
-    host, document = parse.splithost(netloc)
+    scheme, netloc = urllib.parse.splittype(url)
+    host, document = urllib.parse.splithost(netloc)
    port = default_ports.get(scheme, 0)
    if host:
        host = host.lower()
--- a/scripts/analyze_memdump.py
+++ b/scripts/analyze_memdump.py
@ -20,7 +20,7 @@ Analyze a memory dump by the meliae module.
 import sys
 import os
 import codecs
-from html import escape as html_escape
+import html
 from linkcheck import strformat

 def main (filename):
@ -107,7 +107,7 @@ def write_html_obj(fp, obj, objs):
    if obj.value is None:
        value = "None"
    else:
-        value = html_escape(str(obj.value))
+        value = html.escape(str(obj.value))
    attrs = dict(
        address=obj.address,
        size=strformat.strsize(obj.size),
--- a/tests/checker/httpserver.py
+++ b/tests/checker/httpserver.py
@ -17,17 +17,14 @@
 Define http test support classes for LinkChecker tests.
 """

-from html import escape as html_escape
+import html
 from http.server import CGIHTTPRequestHandler, SimpleHTTPRequestHandler, HTTPServer
 from http.client import HTTPConnection, HTTPSConnection
 import os.path
 import ssl
 import time
 import threading
-try:
-    from urllib import parse as urllib_parse
-except ImportError:
-    import urllib as urllib_parse
+import urllib.parse
 from io import BytesIO
 from . import LinkCheckTest
 from .. import get_file
@ -136,7 +133,7 @@ class NoQueryHttpRequestHandler (StoppableHttpRequestHandler):
            displayname = linkname = name
            list_item = (
                '<li><a href="%s">%s</a>\n'
-                % (urllib_parse.quote(linkname), html_escape(displayname))
+                % (urllib.parse.quote(linkname), html.escape(displayname))
            )
            f.write(list_item.encode())
        f.write(b"</ul>\n<hr>\n</body>\n</html>\n")
--- a/tests/test_cgi.py
+++ b/tests/test_cgi.py
@ -17,10 +17,7 @@
 Test cgi form routines.
 """
 import unittest
-try:  # Python 3
-    from urllib import parse as urllib_parse
-except ImportError:  # Python 2
-    import urllib as urllib_parse
+import urllib.parse
 from io import BytesIO
 from wsgiref.util import setup_testing_defaults
 from linkcheck.lc_cgi import checkform, checklink, LCFormError, application
@ -59,7 +56,7 @@ class TestWsgi (unittest.TestCase):

    def test_application (self):
        form = dict(url="http://www.example.com/", level="0")
-        formdata = urllib_parse.urlencode(form)
+        formdata = urllib.parse.urlencode(form)
        formdata = formdata.encode('ascii')
        environ = {'wsgi.input': BytesIO(formdata)}
        setup_testing_defaults(environ)