From c97f68f70ac9966f1cd902a5884f04b55a66632e Mon Sep 17 00:00:00 2001 From: calvin Date: Tue, 9 Nov 2004 00:00:59 +0000 Subject: [PATCH] accept unicode in robots.txt can_fetch git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1924 e7d03fd6-7b0d-0410-9947-9c21f3af8025 --- linkcheck/robotparser2.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/linkcheck/robotparser2.py b/linkcheck/robotparser2.py index 5ccb3eb1..d9780918 100755 --- a/linkcheck/robotparser2.py +++ b/linkcheck/robotparser2.py @@ -224,6 +224,10 @@ class RobotFileParser (object): """using the parsed robots.txt decide if useragent can fetch url""" debug("Checking robot.txt allowance for:\n"\ " user agent: %r\n url: %r" % (useragent, url)) + if not isinstance(useragent, str): + useragent = useragent.encode("ascii", "ignore") + if not isinstance(url, str): + url = url.encode("ascii", "ignore") if self.disallow_all: return False if self.allow_all: