mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-05-05 05:04:46 +00:00
Replace deprecated urllib.parse.split functions
This commit is contained in:
parent
f99f15c349
commit
8779c39735
3 changed files with 37 additions and 21 deletions
|
|
@ -17,8 +17,6 @@
|
|||
Handle http links.
|
||||
"""
|
||||
|
||||
import urllib.parse
|
||||
|
||||
import requests
|
||||
|
||||
# The validity of SSL certs is ignored to be able
|
||||
|
|
@ -273,8 +271,7 @@ class HttpUrl(internpaturl.InternPatternUrl, proxysupport.ProxySupport):
|
|||
# Reset extern and recalculate
|
||||
self.extern = None
|
||||
self.set_extern(newurl)
|
||||
self.urlparts = list(urllib.parse.urlsplit(newurl))
|
||||
self.build_url_parts()
|
||||
self.urlparts = self.build_url_parts(newurl)
|
||||
self.url_connection = response
|
||||
self.headers = response.headers
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
|
|
|||
|
|
@ -436,27 +436,33 @@ class UrlBase:
|
|||
# restore second / in http[s]:// in wayback path
|
||||
urlparts[2] = url_fix_wayback_query(urlparts[2])
|
||||
self.url = urlutil.urlunsplit(urlparts)
|
||||
# split into (modifiable) list
|
||||
self.urlparts = list(urllib.parse.urlsplit(self.url))
|
||||
self.build_url_parts()
|
||||
self.urlparts = self.build_url_parts(self.url)
|
||||
# and unsplit again
|
||||
self.url = urlutil.urlunsplit(self.urlparts)
|
||||
|
||||
def build_url_parts(self):
|
||||
"""Set userinfo, host, port and anchor from self.urlparts.
|
||||
def build_url_parts(self, url):
|
||||
"""Set userinfo, host, port and anchor from url and return urlparts.
|
||||
Also checks for obfuscated IP addresses.
|
||||
"""
|
||||
split = urllib.parse.urlsplit(url)
|
||||
urlparts = list(split)
|
||||
# check userinfo@host:port syntax
|
||||
self.userinfo, host = urllib.parse.splituser(self.urlparts[1])
|
||||
port = urlutil.default_ports.get(self.scheme, 0)
|
||||
host, port = urlutil.splitport(host, port=port)
|
||||
self.userinfo, host = urlutil.split_netloc(split.netloc)
|
||||
try:
|
||||
port = split.port
|
||||
except ValueError:
|
||||
raise LinkCheckerError(
|
||||
_("URL host %(host)r has invalid port") % {"host": host}
|
||||
)
|
||||
if port is None:
|
||||
port = urlutil.default_ports.get(self.scheme, 0)
|
||||
if port is None:
|
||||
raise LinkCheckerError(
|
||||
_("URL host %(host)r has invalid port") % {"host": host}
|
||||
)
|
||||
self.port = port
|
||||
# set host lowercase
|
||||
self.host = host.lower()
|
||||
# urllib.parse.SplitResult.hostname is lowercase
|
||||
self.host = split.hostname
|
||||
if self.scheme in scheme_requires_host:
|
||||
if not self.host:
|
||||
raise LinkCheckerError(_("URL has empty hostname"))
|
||||
|
|
@ -466,13 +472,14 @@ class UrlBase:
|
|||
else:
|
||||
host = "%s:%d" % (self.host, self.port)
|
||||
if self.userinfo:
|
||||
self.urlparts[1] = "%s@%s" % (self.userinfo, host)
|
||||
urlparts[1] = "%s@%s" % (self.userinfo, host)
|
||||
else:
|
||||
self.urlparts[1] = host
|
||||
urlparts[1] = host
|
||||
# safe anchor for later checking
|
||||
self.anchor = self.urlparts[4]
|
||||
self.anchor = split.fragment
|
||||
if self.anchor is not None:
|
||||
assert isinstance(self.anchor, str), repr(self.anchor)
|
||||
return urlparts
|
||||
|
||||
def check_obfuscated_ip(self):
|
||||
"""Warn if host of this URL is obfuscated IP address."""
|
||||
|
|
@ -745,7 +752,8 @@ class UrlBase:
|
|||
"""
|
||||
if self.userinfo:
|
||||
# URL itself has authentication info
|
||||
return urllib.parse.splitpasswd(self.userinfo)
|
||||
split = urllib.parse.urlsplit(self.url)
|
||||
return (split.username, split.password)
|
||||
return self.aggregate.config.get_user_password(self.url)
|
||||
|
||||
def add_url(self, url, line=0, column=0, page=0, name="", base=None):
|
||||
|
|
|
|||
|
|
@ -193,16 +193,24 @@ def idna_encode(host):
|
|||
return host, False
|
||||
|
||||
|
||||
def split_netloc(netloc):
|
||||
"""Separate userinfo from host in urllib.parse.SplitResult.netloc.
|
||||
Originated as urllib.parse._splituser().
|
||||
"""
|
||||
userinfo, delim, hostport = netloc.rpartition('@')
|
||||
return (userinfo if delim else None), hostport
|
||||
|
||||
|
||||
def url_fix_host(urlparts, encoding):
|
||||
"""Unquote and fix hostname. Returns is_idn."""
|
||||
if not urlparts[1]:
|
||||
urlparts[2] = urllib.parse.unquote(urlparts[2], encoding=encoding)
|
||||
return False
|
||||
userpass, netloc = urllib.parse.splituser(urlparts[1])
|
||||
userpass, hostport = split_netloc(urlparts[1])
|
||||
if userpass:
|
||||
userpass = urllib.parse.unquote(userpass, encoding=encoding)
|
||||
netloc, is_idn = idna_encode(
|
||||
urllib.parse.unquote(netloc, encoding=encoding).lower()
|
||||
urllib.parse.unquote(hostport, encoding=encoding).lower()
|
||||
)
|
||||
# a leading backslash in path causes urlsplit() to add the
|
||||
# path components up to the first slash to host
|
||||
|
|
@ -419,7 +427,10 @@ def url_quote(url, encoding):
|
|||
|
||||
def document_quote(document):
|
||||
"""Quote given document."""
|
||||
doc, query = urllib.parse.splitquery(document)
|
||||
doc, delim, query = document.rpartition('?')
|
||||
if not delim:
|
||||
doc = document
|
||||
query = None
|
||||
doc = urllib.parse.quote(doc, safe='/=,')
|
||||
if query:
|
||||
return "%s?%s" % (doc, query)
|
||||
|
|
|
|||
Loading…
Reference in a new issue