mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-27 19:20:30 +00:00
Merge pull request #312 from cjmayo/notneeded
Revert Python 3 patches not needed after decode
This commit is contained in:
commit
5a43cfec40
7 changed files with 16 additions and 35 deletions
|
|
@ -20,8 +20,6 @@ import sys
|
|||
import json
|
||||
from xdg.BaseDirectory import xdg_config_home
|
||||
|
||||
from .. import url as urlutil
|
||||
|
||||
|
||||
# Windows filename encoding
|
||||
nt_filename_encoding="mbcs"
|
||||
|
|
@ -71,7 +69,7 @@ def parse_bookmark_data (data):
|
|||
Return iterator for bookmarks of the form (url, name).
|
||||
Bookmarks are not sorted.
|
||||
"""
|
||||
for url, name in parse_bookmark_json(json.loads(urlutil.decode_for_unquote(data))):
|
||||
for url, name in parse_bookmark_json(json.loads(data)):
|
||||
yield url, name
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -58,11 +58,11 @@ def absolute_url (base_url, base_ref, parent_url):
|
|||
@param parent_url: url of parent document
|
||||
@type parent_url: string or None
|
||||
"""
|
||||
if base_url and urlutil.url_is_absolute(urlutil.decode_for_unquote(base_url)):
|
||||
if base_url and urlutil.url_is_absolute(base_url):
|
||||
return base_url
|
||||
elif base_ref and urlutil.url_is_absolute(urlutil.decode_for_unquote(base_ref)):
|
||||
elif base_ref and urlutil.url_is_absolute(base_ref):
|
||||
return base_ref
|
||||
elif parent_url and urlutil.url_is_absolute(urlutil.decode_for_unquote(parent_url)):
|
||||
elif parent_url and urlutil.url_is_absolute(parent_url):
|
||||
return parent_url
|
||||
return u""
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ Handle local file: links.
|
|||
|
||||
import re
|
||||
import os
|
||||
import sys
|
||||
try:
|
||||
import urlparse
|
||||
except ImportError:
|
||||
|
|
@ -84,7 +83,7 @@ def get_os_filename (path):
|
|||
"""Return filesystem path for given URL path."""
|
||||
if os.name == 'nt':
|
||||
path = prepare_urlpath_for_nt(path)
|
||||
res = urlrequest.url2pathname(urlutil.decode_for_unquote(fileutil.pathencode(path)))
|
||||
res = urlrequest.url2pathname(fileutil.pathencode(path))
|
||||
if os.name == 'nt' and res.endswith(':') and len(res) == 2:
|
||||
# Work around http://bugs.python.org/issue11474
|
||||
res += os.sep
|
||||
|
|
@ -193,10 +192,7 @@ class FileUrl (urlbase.UrlBase):
|
|||
if self.is_directory():
|
||||
self.set_result(_("directory"))
|
||||
else:
|
||||
if sys.version_info.major < 3:
|
||||
url = fileutil.pathencode(self.url)
|
||||
else:
|
||||
url = self.url
|
||||
url = fileutil.pathencode(self.url)
|
||||
self.url_connection = urlopen(url)
|
||||
self.check_case_sensitivity()
|
||||
|
||||
|
|
|
|||
|
|
@ -158,7 +158,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
{"addr": mail}, valid=False, overwrite=False)
|
||||
return
|
||||
# note: be sure to use rsplit since "@" can occur in local part
|
||||
local, domain = urlutil.decode_for_unquote(mail).rsplit("@", 1)
|
||||
local, domain = mail.rsplit("@", 1)
|
||||
if not local:
|
||||
self.set_result(_("Missing local part of mail address `%(addr)s'.") % \
|
||||
{"addr": mail}, valid=False, overwrite=False)
|
||||
|
|
@ -247,7 +247,7 @@ class MailtoUrl (urlbase.UrlBase):
|
|||
from dns.exception import DNSException
|
||||
log.debug(LOG_CHECK, "checking mail address %r", mail)
|
||||
mail = strformat.ascii_safe(mail)
|
||||
username, domain = urlutil.decode_for_unquote(mail).rsplit('@', 1)
|
||||
username, domain = mail.rsplit('@', 1)
|
||||
log.debug(LOG_CHECK, "looking up MX mailhost %r", domain)
|
||||
try:
|
||||
answers = resolver.query(domain, 'MX')
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Parse names of title tags and link types.
|
|||
"""
|
||||
|
||||
import re
|
||||
from .. import HtmlParser, strformat, url as urlutil
|
||||
from .. import HtmlParser, strformat
|
||||
|
||||
|
||||
imgtag_re = re.compile(r"(?i)\s+alt\s*=\s*"+\
|
||||
|
|
@ -52,7 +52,6 @@ def image_name (txt):
|
|||
def href_name (txt):
|
||||
"""Return the name part of the first <a href="">name</a> link in txt."""
|
||||
name = u""
|
||||
txt = urlutil.decode_for_unquote(txt)
|
||||
endtag = a_end_search(txt)
|
||||
if not endtag:
|
||||
return name
|
||||
|
|
|
|||
|
|
@ -93,7 +93,6 @@ c_comment_re = re.compile(r"/\*.*?\*/", re.DOTALL)
|
|||
def strip_c_comments (text):
|
||||
"""Remove C/CSS-style comments from text. Note that this method also
|
||||
deliberately removes comments inside of strings."""
|
||||
text = urlutil.decode_for_unquote(text)
|
||||
return c_comment_re.sub('', text)
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -92,16 +92,6 @@ is_safe_query = re.compile("(?i)^%s$" % _safe_query_pattern).match
|
|||
is_safe_fragment = re.compile("(?i)^%s$" % _safe_fragment_pattern).match
|
||||
|
||||
|
||||
def decode_for_unquote(part):
|
||||
"""
|
||||
Decode string for unquote function
|
||||
To string in Python 3, leave it in Python 2
|
||||
"""
|
||||
if not isinstance(part, (str, str_text)):
|
||||
# Python 3: we probably got bytes
|
||||
part = part.decode("utf-8", "replace")
|
||||
return part
|
||||
|
||||
# snatched form urlparse.py
|
||||
def splitparams (path):
|
||||
"""Split off parameter part from path.
|
||||
|
|
@ -202,9 +192,9 @@ def idna_encode (host):
|
|||
def url_fix_host (urlparts):
|
||||
"""Unquote and fix hostname. Returns is_idn."""
|
||||
if not urlparts[1]:
|
||||
urlparts[2] = parse.unquote(decode_for_unquote(urlparts[2]))
|
||||
urlparts[2] = parse.unquote(urlparts[2])
|
||||
return False
|
||||
userpass, netloc = parse.splituser(decode_for_unquote(urlparts[1]))
|
||||
userpass, netloc = parse.splituser(urlparts[1])
|
||||
if userpass:
|
||||
userpass = parse.unquote(userpass)
|
||||
netloc, is_idn = idna_encode(parse.unquote(netloc).lower())
|
||||
|
|
@ -218,7 +208,7 @@ def url_fix_host (urlparts):
|
|||
if not urlparts[2] or urlparts[2] == '/':
|
||||
urlparts[2] = comps
|
||||
else:
|
||||
urlparts[2] = "%s%s" % (comps, parse.unquote(decode_for_unquote(urlparts[2])))
|
||||
urlparts[2] = "%s%s" % (comps, parse.unquote(urlparts[2]))
|
||||
netloc = netloc[:i]
|
||||
else:
|
||||
# a leading ? in path causes urlsplit() to add the query to the
|
||||
|
|
@ -227,7 +217,7 @@ def url_fix_host (urlparts):
|
|||
if i != -1:
|
||||
netloc, urlparts[3] = netloc.split('?', 1)
|
||||
# path
|
||||
urlparts[2] = parse.unquote(decode_for_unquote(urlparts[2]))
|
||||
urlparts[2] = parse.unquote(urlparts[2])
|
||||
if userpass:
|
||||
# append AT for easy concatenation
|
||||
userpass += "@"
|
||||
|
|
@ -272,7 +262,6 @@ def url_parse_query (query, encoding=None):
|
|||
"""Parse and re-join the given CGI query."""
|
||||
# if ? is in the query, split it off, seen at msdn.microsoft.com
|
||||
append = ""
|
||||
query = decode_for_unquote(query)
|
||||
while '?' in query:
|
||||
query, rest = query.rsplit('?', 1)
|
||||
append = '?'+url_parse_query(rest)+append
|
||||
|
|
@ -322,7 +311,7 @@ def url_norm (url, encoding=None):
|
|||
encode_unicode = False
|
||||
urlparts = list(urlparse.urlsplit(url))
|
||||
# scheme
|
||||
urlparts[0] = parse.unquote(decode_for_unquote(urlparts[0])).lower()
|
||||
urlparts[0] = parse.unquote(urlparts[0]).lower()
|
||||
# mailto: urlsplit is broken
|
||||
if urlparts[0] == 'mailto':
|
||||
url_fix_mailto_urlsplit(urlparts)
|
||||
|
|
@ -342,7 +331,7 @@ def url_norm (url, encoding=None):
|
|||
# fix redundant path parts
|
||||
urlparts[2] = collapse_segments(urlparts[2])
|
||||
# anchor
|
||||
urlparts[4] = parse.unquote(decode_for_unquote(urlparts[4]))
|
||||
urlparts[4] = parse.unquote(urlparts[4])
|
||||
# quote parts again
|
||||
urlparts[0] = url_quote_part(urlparts[0], encoding=encoding) # scheme
|
||||
urlparts[1] = url_quote_part(urlparts[1], safechars='@:', encoding=encoding) # host
|
||||
|
|
@ -351,7 +340,7 @@ def url_norm (url, encoding=None):
|
|||
urlparts[2] = url_fix_wayback_query(urlparts[2]) # unencode colon in http[s]:// in wayback path
|
||||
urlparts[4] = url_quote_part(urlparts[4], safechars="!$&'()*+,-./;=?@_~", encoding=encoding) # anchor
|
||||
res = urlunsplit(urlparts)
|
||||
if decode_for_unquote(url).endswith('#') and not urlparts[4]:
|
||||
if url.endswith('#') and not urlparts[4]:
|
||||
# re-append trailing empty fragment
|
||||
res += '#'
|
||||
if encode_unicode:
|
||||
|
|
|
|||
Loading…
Reference in a new issue