From 3a6540bc46e1a32353f1bda227f9b4f01970b752 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 1/7] Replace isinstance() in strformat.ascii_safe() --- linkcheck/strformat.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/linkcheck/strformat.py b/linkcheck/strformat.py index fa12f610..f25c55aa 100644 --- a/linkcheck/strformat.py +++ b/linkcheck/strformat.py @@ -62,12 +62,12 @@ def ascii_safe(s): """Get ASCII string without raising encoding errors. Unknown characters of the given encoding will be ignored. - @param s: the Unicode string to be encoded - @type s: unicode or None - @return: encoded ASCII version of s, or None if s was None - @rtype: string + @param s: the string to be encoded + @type s: string or None + @return: version of s containing only ASCII characters, or None if s was None + @rtype: string or None """ - if isinstance(s, str): + if s is not None: s = s.encode('ascii', 'ignore').decode('ascii') return s From 9c9a3d8b144007412e41aeb8fe3a74a85e5fa7c9 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 2/7] Remove isinstance() from url.idna_encode() Was originally used for Python 2 Unicode strings. f4b73c6d ("Python3: fix unicode in url.py", 2018-01-05) --- linkcheck/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/url.py b/linkcheck/url.py index 13ae3f91..7340e27c 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -183,7 +183,7 @@ def idna_encode(host): to RFC 3490. @raise: UnicodeError if hostname is not properly IDN encoded. """ - if host and isinstance(host, str): + if host: try: host.encode('ascii') return host, False From 323173036664c78d679559afd73425833e78fab8 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 3/7] Remove isinstance() from robotparser2.py Originally for encoding Python 2 Unicode strings [1]. Will not be used in Python 3 because the variables are strings, if they were bytes exceptions would be raised. [1] c97f68f7 ("accept unicode in robots.txt can_fetch", 2004-11-09) --- linkcheck/robotparser2.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/linkcheck/robotparser2.py b/linkcheck/robotparser2.py index d4ca6ee2..1a5af387 100644 --- a/linkcheck/robotparser2.py +++ b/linkcheck/robotparser2.py @@ -276,10 +276,6 @@ class RobotFileParser: useragent, url, ) - if not isinstance(useragent, str): - useragent = useragent.encode("ascii", "ignore") - if not isinstance(url, str): - url = url.encode("ascii", "ignore") if self.disallow_all: log.debug(LOG_CHECK, " ... disallow all.") return False From f86e506de449bd10860017724cbaa1689b3aa42d Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 4/7] Remove isinstance() from FileUrl.read_content() get_index_html() returns a string. --- linkcheck/checker/fileurl.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/linkcheck/checker/fileurl.py b/linkcheck/checker/fileurl.py index 6e0753e2..8f0134e3 100644 --- a/linkcheck/checker/fileurl.py +++ b/linkcheck/checker/fileurl.py @@ -234,8 +234,7 @@ class FileUrl(urlbase.UrlBase): with links to the files.""" if self.is_directory(): data = get_index_html(get_files(self.get_os_filename())) - if isinstance(data, str): - data = data.encode("iso8859-1", "ignore") + data = data.encode("iso8859-1", "ignore") else: data = super().read_content() return data From 8f9f687ed8544bc6e202b89f1a3e3a47307acf9e Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 5/7] Remove isinstance() from fileutil.path_safe() paths are derived from urls which are strings. --- linkcheck/fileutil.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/fileutil.py b/linkcheck/fileutil.py index 5c8c8948..cc39342d 100644 --- a/linkcheck/fileutil.py +++ b/linkcheck/fileutil.py @@ -70,7 +70,7 @@ else: def path_safe(path): """Ensure path string is compatible with the platform file system encoding.""" - if isinstance(path, str) and not os.path.supports_unicode_filenames: + if path and not os.path.supports_unicode_filenames: path = path.encode(FSCODING, "replace").decode(FSCODING) return path From 1f77506c9f37272b08cdda776dd96f370e073c17 Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Thu, 18 Jun 2020 19:27:06 +0100 Subject: [PATCH 6/7] Remove isinstance() in url.url_fix_mailto_urlsplit() urls are strings. --- linkcheck/url.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/url.py b/linkcheck/url.py index 7340e27c..eb8fc171 100644 --- a/linkcheck/url.py +++ b/linkcheck/url.py @@ -255,7 +255,7 @@ def url_fix_common_typos(url): def url_fix_mailto_urlsplit(urlparts): """Split query part of mailto url if found.""" - sep = b"?" if isinstance(urlparts[2], bytes) else "?" + sep = "?" if sep in urlparts[2]: urlparts[2], urlparts[3] = urlparts[2].split(sep, 1) From 7a0644a234798afe5dfcce681328ce0775216e7d Mon Sep 17 00:00:00 2001 From: Chris Mayo Date: Wed, 8 Jul 2020 19:47:59 +0100 Subject: [PATCH 7/7] No need to process an empty string in str_format.ascii_safe() --- linkcheck/strformat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/linkcheck/strformat.py b/linkcheck/strformat.py index f25c55aa..a028844b 100644 --- a/linkcheck/strformat.py +++ b/linkcheck/strformat.py @@ -67,7 +67,7 @@ def ascii_safe(s): @return: version of s containing only ASCII characters, or None if s was None @rtype: string or None """ - if s is not None: + if s: s = s.encode('ascii', 'ignore').decode('ascii') return s