mirror of
https://github.com/Hopiu/linkchecker.git
synced 2026-04-25 16:44:43 +00:00
don't discard robots.txt entries with only Allow: lines
git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3471 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
parent
89538af3a1
commit
0c5d34e9f9
2 changed files with 13 additions and 0 deletions
|
|
@ -300,6 +300,7 @@ class RobotFileParser (object):
|
|||
" this line", self.url, linenumber)
|
||||
else:
|
||||
entry.rulelines.append(RuleLine(line[1], 1))
|
||||
state = 2
|
||||
elif line[0] == "crawl-delay":
|
||||
if state == 0:
|
||||
assert None == log.debug(linkcheck.LOG_CHECK,
|
||||
|
|
|
|||
|
|
@ -98,6 +98,18 @@ class TestRobotsTxt (unittest.TestCase):
|
|||
self.rp.parse(lines)
|
||||
self.assertEquals(str(self.rp), "")
|
||||
|
||||
def test_robotstxt7 (self):
|
||||
lines = [
|
||||
"User-agent: Bla",
|
||||
"Allow: /",
|
||||
"",
|
||||
"User-agent: *",
|
||||
"Disallow: /",
|
||||
]
|
||||
self.rp.parse(lines)
|
||||
self.assertEquals(str(self.rp), "\n".join(lines))
|
||||
self.assert_(self.rp.can_fetch("Bla", "/"))
|
||||
|
||||
def test_crawldelay (self):
|
||||
lines = [
|
||||
"User-agent: Blubb",
|
||||
|
|
|
|||
Loading…
Reference in a new issue