mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-03-17 06:20:27 +00:00
accept unicode in robots.txt can_fetch
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@1924 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
0113eb9fcb
commit
c97f68f70a
1 changed files with 4 additions and 0 deletions
|
|
@ -224,6 +224,10 @@ class RobotFileParser (object):
|
|||
"""using the parsed robots.txt decide if useragent can fetch url"""
|
||||
debug("Checking robot.txt allowance for:\n"\
|
||||
" user agent: %r\n url: %r" % (useragent, url))
|
||||
if not isinstance(useragent, str):
|
||||
useragent = useragent.encode("ascii", "ignore")
|
||||
if not isinstance(url, str):
|
||||
url = url.encode("ascii", "ignore")
|
||||
if self.disallow_all:
|
||||
return False
|
||||
if self.allow_all:
|
||||
|
|
|
|||
Loading…
Reference in a new issue