diff --git a/linkcheck/url.py b/linkcheck/url.py
index ee7463ba..ac69fa83 100644
--- a/linkcheck/url.py
+++ b/linkcheck/url.py
@@ -279,6 +279,9 @@ def url_norm (url, encoding=None):
url = url.encode('ascii')
except UnicodeEncodeError:
pass
+ encode_unicode = True
+ else:
+ encode_unicode = False
urlparts = list(urlparse.urlsplit(url))
# scheme
urlparts[0] = urllib.unquote(urlparts[0]).lower()
@@ -312,7 +315,7 @@ def url_norm (url, encoding=None):
if url.endswith('#') and not urlparts[4]:
# re-append trailing empty fragment
res += '#'
- if isinstance(url, unicode):
+ if encode_unicode:
res = unicode(res)
return (res, is_idn)
diff --git a/tests/checker/data/http.html.result b/tests/checker/data/http.html.result
index e85fef1f..58bd2bb4 100644
--- a/tests/checker/data/http.html.result
+++ b/tests/checker/data/http.html.result
@@ -121,11 +121,11 @@ real url http://localhost:8001/?d=directory&p=page1
name should not be cached
valid
-url http://localhost:8001/?quoted=ü
-cache key http://localhost:8001/?quoted=%%FC
-real url http://localhost:8001/?quoted=%%FC
+url http://localhost:8001/?quoted=ü
+cache key http://localhost:8001/?quoted=%%C3%%BC
+real url http://localhost:8001/?quoted=%%C3%%BC
name html entities
-warning Base URL is not properly normed. Normed URL is http://localhost:8001/?quoted=%%FC.
+warning Base URL is not properly normed. Normed URL is http://localhost:8001/?quoted=%%C3%%BC.
valid
url clsid:12345
diff --git a/tests/checker/data/misc.html b/tests/checker/data/misc.html
index df7c1a54..59417caa 100644
--- a/tests/checker/data/misc.html
+++ b/tests/checker/data/misc.html
@@ -3,9 +3,6 @@
-
-blubb
-
diff --git a/tests/checker/data/misc.html.result b/tests/checker/data/misc.html.result
index 5fc4a2fc..feec03f3 100644
--- a/tests/checker/data/misc.html.result
+++ b/tests/checker/data/misc.html.result
@@ -19,12 +19,6 @@ cache key file://%(curdir)s/%(datadir)s/favicon.ico
real url file://%(curdir)s/%(datadir)s/favicon.ico
valid
-url http://imadööfus.org%%0D%%3Cfont%%20face=%%22Verdana,%%20Arial,%%20Helvetica,%%20sans-serif%%22%%20size=%%222%%22%%3E%%3Chttp://www.imadoofus.org%%3E%%20%%0D%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20%%20
-cache key None
-real url
-name blubb
-error
-
url
cache key None
real url
diff --git a/tests/checker/test_error.py b/tests/checker/test_error.py
index 95cda92e..c0d74155 100644
--- a/tests/checker/test_error.py
+++ b/tests/checker/test_error.py
@@ -31,9 +31,10 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
@@ -44,10 +45,11 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
u"name %(url)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
@@ -55,10 +57,11 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
u"name %(url)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
@@ -92,10 +95,11 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
u"name %(url)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
@@ -106,10 +110,11 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
u"name %(url)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
@@ -120,10 +125,11 @@ class TestError (LinkCheckTest):
attrs = self.get_attrs(url=url)
attrs['nurl'] = self.norm("file://%(curdir)s/%(url)s" % attrs)
resultlines = [
- u"url %(nurl)s" % attrs,
+ u"url file://%(curdir)s/%(url)s" % attrs,
u"cache key %(nurl)s" % attrs,
u"real url %(nurl)s" % attrs,
u"name %(url)s" % attrs,
+ u"warning Base URL is not properly normed. Normed URL is %(nurl)s." % attrs,
u"error",
]
self.direct(url, resultlines)
diff --git a/tests/checker/test_http.py b/tests/checker/test_http.py
index 488c86f0..8e0dd04c 100644
--- a/tests/checker/test_http.py
+++ b/tests/checker/test_http.py
@@ -133,7 +133,6 @@ class TestHttp (httpserver.HttpServerTest):
u"url http://www.example.org/",
u"cache key http://www.example.org/",
u"real url http://www.example.org/",
- u"warning Access denied by robots.txt, checked only syntax.",
u"valid",
]
self.direct(url, resultlines, recursionlevel=1)