don't discard robots.txt entries with only Allow: lines

git-svn-id: https://linkchecker.svn.sourceforge.net/svnroot/linkchecker/trunk/linkchecker@3471 e7d03fd6-7b0d-0410-9947-9c21f3af8025
This commit is contained in:
calvin 2006-09-21 09:14:28 +00:00
parent 89538af3a1
commit 0c5d34e9f9
2 changed files with 13 additions and 0 deletions

View file

@ -300,6 +300,7 @@ class RobotFileParser (object):
" this line", self.url, linenumber)
else:
entry.rulelines.append(RuleLine(line[1], 1))
state = 2
elif line[0] == "crawl-delay":
if state == 0:
assert None == log.debug(linkcheck.LOG_CHECK,

View file

@ -98,6 +98,18 @@ class TestRobotsTxt (unittest.TestCase):
self.rp.parse(lines)
self.assertEquals(str(self.rp), "")
def test_robotstxt7 (self):
lines = [
"User-agent: Bla",
"Allow: /",
"",
"User-agent: *",
"Disallow: /",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "\n".join(lines))
self.assert_(self.rp.can_fetch("Bla", "/"))
def test_crawldelay (self):
lines = [
"User-agent: Blubb",