mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-25 08:34:43 +00:00
accept unicode in robots.txt can_fetch
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1924 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
0113eb9fcb
commit
c97f68f70a
1 changed files with 4 additions and 0 deletions
|
|
@ -224,6 +224,10 @@ class RobotFileParser (object):
|
||||||
"""using the parsed robots.txt decide if useragent can fetch url"""
|
"""using the parsed robots.txt decide if useragent can fetch url"""
|
||||||
debug("Checking robot.txt allowance for:\n"\
|
debug("Checking robot.txt allowance for:\n"\
|
||||||
" user agent: %r\n url: %r" % (useragent, url))
|
" user agent: %r\n url: %r" % (useragent, url))
|
||||||
|
if not isinstance(useragent, str):
|
||||||
|
useragent = useragent.encode("ascii", "ignore")
|
||||||
|
if not isinstance(url, str):
|
||||||
|
url = url.encode("ascii", "ignore")
|
||||||
if self.disallow_all:
|
if self.disallow_all:
|
||||||
return False
|
return False
|
||||||
if self.allow_all:
|
if self.allow_all:
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue