linkchecker/linkcheck/tests/test_robotstxt.py

148 lines
3.9 KiB
Python

# -*- coding: iso-8859-1 -*-
# Copyright (C) 2006 Bastian Kleineidam
#
# This program is free software; you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation; either version 2 of the License, or
# (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
"""
Test robots.txt parsing.
"""
import unittest
import linkcheck.robotparser2
class TestRobotsTxt (unittest.TestCase):
"""
Test string formatting routines.
"""
def setUp (self):
"""
Initialize self.rp as a robots.txt parser.
"""
self.rp = linkcheck.robotparser2.RobotFileParser()
def test_robotstxt (self):
lines = [
"User-agent: *",
]
self.rp.parse(lines)
self.assert_(self.rp.mtime() > 0)
self.assertEquals(str(self.rp), "\n".join(lines))
def test_robotstxt2 (self):
lines = [
"User-agent: *",
"Disallow: /search",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "\n".join(lines))
def test_robotstxt3 (self):
lines = [
"Disallow: /search",
"",
"Allow: /search",
"",
"Crawl-Delay: 5",
"",
"Blabla",
"",
"Bla: bla",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "")
def test_robotstxt4 (self):
lines = [
"User-agent: Bla",
"Disallow: /cgi-bin",
"User-agent: *",
"Disallow: /search",
]
self.rp.parse(lines)
lines.insert(2, "")
self.assertEquals(str(self.rp), "\n".join(lines))
def test_robotstxt5 (self):
lines = [
"#one line comment",
"User-agent: Bla",
"Disallow: /cgi-bin # comment",
"Allow: /search",
]
lines2 = [
"User-agent: Bla",
"Disallow: /cgi-bin",
"Allow: /search",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "\n".join(lines2))
def test_robotstxt6 (self):
lines = [
"User-agent: Bla",
"",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "")
def test_robotstxt7 (self):
lines = [
"User-agent: Bla",
"Allow: /",
"",
"User-agent: *",
"Disallow: /",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "\n".join(lines))
self.assert_(self.rp.can_fetch("Bla", "/"))
def test_crawldelay (self):
lines = [
"User-agent: Blubb",
"Crawl-delay: 10",
"",
"User-agent: Hulla",
"Crawl-delay: 5",
"",
"User-agent: *",
"Crawl-delay: 1",
]
self.rp.parse(lines)
self.assertEquals(str(self.rp), "\n".join(lines))
self.assertEquals(self.rp.get_crawldelay("Blubb"), 10)
self.assertEquals(self.rp.get_crawldelay("Hulla"), 5)
self.assertEquals(self.rp.get_crawldelay("Bulla"), 1)
def test_crawldelay2 (self):
lines = [
"User-agent: Blubb",
"Crawl-delay: X",
]
self.rp.parse(lines)
del lines[1]
self.assertEquals(str(self.rp), "\n".join(lines))
def test_suite ():
"""
Build and return a TestSuite.
"""
return unittest.makeSuite(TestRobotsTxt)
if __name__ == '__main__':
unittest.main()