Merge pull request #312 from cjmayo/notneeded

Revert Python 3 patches not needed after decode
2026-05-12 00:23:12 +00:00 · 2019-10-07 11:29:52 -04:00 · 2019-10-07 11:29:52 -04:00 · 5a43cfec40
commit 5a43cfec40
parent 07cf9c1c11 5732606c58
7 changed files with 16 additions and 35 deletions
--- a/linkcheck/bookmarks/chromium.py
+++ b/linkcheck/bookmarks/chromium.py
@ -20,8 +20,6 @@ import sys
 import json
 from xdg.BaseDirectory import xdg_config_home

-from .. import url as urlutil
-

 # Windows filename encoding
 nt_filename_encoding="mbcs"
@ -71,7 +69,7 @@ def parse_bookmark_data (data):
    Return iterator for bookmarks of the form (url, name).
    Bookmarks are not sorted.
    """
-    for url, name in parse_bookmark_json(json.loads(urlutil.decode_for_unquote(data))):
+    for url, name in parse_bookmark_json(json.loads(data)):
        yield url, name


--- a/linkcheck/checker/init.py
+++ b/linkcheck/checker/init.py
@ -58,11 +58,11 @@ def absolute_url (base_url, base_ref, parent_url):
    @param parent_url: url of parent document
    @type parent_url: string or None
    """
-    if base_url and urlutil.url_is_absolute(urlutil.decode_for_unquote(base_url)):
+    if base_url and urlutil.url_is_absolute(base_url):
        return base_url
-    elif base_ref and urlutil.url_is_absolute(urlutil.decode_for_unquote(base_ref)):
+    elif base_ref and urlutil.url_is_absolute(base_ref):
        return base_ref
-    elif parent_url and urlutil.url_is_absolute(urlutil.decode_for_unquote(parent_url)):
+    elif parent_url and urlutil.url_is_absolute(parent_url):
        return parent_url
    return u""

--- a/linkcheck/checker/fileurl.py
+++ b/linkcheck/checker/fileurl.py
@ -20,7 +20,6 @@ Handle local file: links.

 import re
 import os
-import sys
 try:
    import urlparse
 except ImportError:
@ -84,7 +83,7 @@ def get_os_filename (path):
    """Return filesystem path for given URL path."""
    if os.name == 'nt':
        path = prepare_urlpath_for_nt(path)
-    res = urlrequest.url2pathname(urlutil.decode_for_unquote(fileutil.pathencode(path)))
+    res = urlrequest.url2pathname(fileutil.pathencode(path))
    if os.name == 'nt' and res.endswith(':') and len(res) == 2:
        # Work around http://bugs.python.org/issue11474
        res += os.sep
@ -193,10 +192,7 @@ class FileUrl (urlbase.UrlBase):
        if self.is_directory():
            self.set_result(_("directory"))
        else:
-            if sys.version_info.major < 3:
-                url = fileutil.pathencode(self.url)
-            else:
-                url = self.url
+            url = fileutil.pathencode(self.url)
            self.url_connection = urlopen(url)
            self.check_case_sensitivity()

--- a/linkcheck/checker/mailtourl.py
+++ b/linkcheck/checker/mailtourl.py
@ -158,7 +158,7 @@ class MailtoUrl (urlbase.UrlBase):
            {"addr": mail}, valid=False, overwrite=False)
            return
        # note: be sure to use rsplit since "@" can occur in local part
-        local, domain = urlutil.decode_for_unquote(mail).rsplit("@", 1)
+        local, domain = mail.rsplit("@", 1)
        if not local:
            self.set_result(_("Missing local part of mail address `%(addr)s'.") % \
            {"addr": mail}, valid=False, overwrite=False)
@ -247,7 +247,7 @@ class MailtoUrl (urlbase.UrlBase):
        from dns.exception import DNSException
        log.debug(LOG_CHECK, "checking mail address %r", mail)
        mail = strformat.ascii_safe(mail)
-        username, domain = urlutil.decode_for_unquote(mail).rsplit('@', 1)
+        username, domain = mail.rsplit('@', 1)
        log.debug(LOG_CHECK, "looking up MX mailhost %r", domain)
        try:
            answers = resolver.query(domain, 'MX')
--- a/linkcheck/htmlutil/linkname.py
+++ b/linkcheck/htmlutil/linkname.py
@ -19,7 +19,7 @@ Parse names of title tags and link types.
 """

 import re
-from .. import HtmlParser, strformat, url as urlutil
+from .. import HtmlParser, strformat


 imgtag_re = re.compile(r"(?i)\s+alt\s*=\s*"+\
@ -52,7 +52,6 @@ def image_name (txt):
 def href_name (txt):
    """Return the name part of the first <a href="">name</a> link in txt."""
    name = u""
-    txt = urlutil.decode_for_unquote(txt)
    endtag = a_end_search(txt)
    if not endtag:
        return name
--- a/linkcheck/htmlutil/linkparse.py
+++ b/linkcheck/htmlutil/linkparse.py
@ -93,7 +93,6 @@ c_comment_re = re.compile(r"/\*.*?\*/", re.DOTALL)
 def strip_c_comments (text):
    """Remove C/CSS-style comments from text. Note that this method also
    deliberately removes comments inside of strings."""
-    text = urlutil.decode_for_unquote(text)
    return c_comment_re.sub('', text)


--- a/linkcheck/url.py
+++ b/linkcheck/url.py
@ -92,16 +92,6 @@ is_safe_query = re.compile("(?i)^%s$" % _safe_query_pattern).match
 is_safe_fragment = re.compile("(?i)^%s$" % _safe_fragment_pattern).match


-def decode_for_unquote(part):
-    """
-    Decode string for unquote function
-    To string in Python 3, leave it in Python 2
-    """
-    if not isinstance(part, (str, str_text)):
-        # Python 3: we probably got bytes
-        part = part.decode("utf-8", "replace")
-    return part
-
 # snatched form urlparse.py
 def splitparams (path):
    """Split off parameter part from path.
@ -202,9 +192,9 @@ def idna_encode (host):
 def url_fix_host (urlparts):
    """Unquote and fix hostname. Returns is_idn."""
    if not urlparts[1]:
-        urlparts[2] = parse.unquote(decode_for_unquote(urlparts[2]))
+        urlparts[2] = parse.unquote(urlparts[2])
        return False
-    userpass, netloc = parse.splituser(decode_for_unquote(urlparts[1]))
+    userpass, netloc = parse.splituser(urlparts[1])
    if userpass:
        userpass = parse.unquote(userpass)
    netloc, is_idn = idna_encode(parse.unquote(netloc).lower())
@ -218,7 +208,7 @@ def url_fix_host (urlparts):
        if not urlparts[2] or urlparts[2] == '/':
            urlparts[2] = comps
        else:
-            urlparts[2] = "%s%s" % (comps, parse.unquote(decode_for_unquote(urlparts[2])))
+            urlparts[2] = "%s%s" % (comps, parse.unquote(urlparts[2]))
        netloc = netloc[:i]
    else:
        # a leading ? in path causes urlsplit() to add the query to the
@ -227,7 +217,7 @@ def url_fix_host (urlparts):
        if i != -1:
            netloc, urlparts[3] = netloc.split('?', 1)
        # path
-        urlparts[2] = parse.unquote(decode_for_unquote(urlparts[2]))
+        urlparts[2] = parse.unquote(urlparts[2])
    if userpass:
        # append AT for easy concatenation
        userpass += "@"
@ -272,7 +262,6 @@ def url_parse_query (query, encoding=None):
    """Parse and re-join the given CGI query."""
    # if ? is in the query, split it off, seen at msdn.microsoft.com
    append = ""
-    query = decode_for_unquote(query)
    while '?' in query:
        query, rest = query.rsplit('?', 1)
        append = '?'+url_parse_query(rest)+append
@ -322,7 +311,7 @@ def url_norm (url, encoding=None):
        encode_unicode = False
    urlparts = list(urlparse.urlsplit(url))
    # scheme
-    urlparts[0] = parse.unquote(decode_for_unquote(urlparts[0])).lower()
+    urlparts[0] = parse.unquote(urlparts[0]).lower()
    # mailto: urlsplit is broken
    if urlparts[0] == 'mailto':
        url_fix_mailto_urlsplit(urlparts)
@ -342,7 +331,7 @@ def url_norm (url, encoding=None):
            # fix redundant path parts
            urlparts[2] = collapse_segments(urlparts[2])
    # anchor
-    urlparts[4] = parse.unquote(decode_for_unquote(urlparts[4]))
+    urlparts[4] = parse.unquote(urlparts[4])
    # quote parts again
    urlparts[0] = url_quote_part(urlparts[0], encoding=encoding) # scheme
    urlparts[1] = url_quote_part(urlparts[1], safechars='@:', encoding=encoding) # host
@ -351,7 +340,7 @@ def url_norm (url, encoding=None):
        urlparts[2] = url_fix_wayback_query(urlparts[2]) # unencode colon in http[s]:// in wayback path
    urlparts[4] = url_quote_part(urlparts[4], safechars="!$&'()*+,-./;=?@_~", encoding=encoding) # anchor
    res = urlunsplit(urlparts)
-    if decode_for_unquote(url).endswith('#') and not urlparts[4]:
+    if url.endswith('#') and not urlparts[4]:
        # re-append trailing empty fragment
        res += '#'
    if encode_unicode: