linkchecker/test/robots.txt
2001-02-20 09:32:36 +00:00

58 lines
1.3 KiB
Text

# /robots.txt for http://www.musi-cal.com/
# See http://info.webcrawler.com/mak/projects/robots/norobots.html
# Skip Montanaro (skip@mojam.com)
# - adapted from the robots.txt file at http://web.nexor.co.uk/
# disallow a bunch of ill-behaved user agents (doubt this will deter them...)
User-agent: ExtractorPro
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: EmailWolf
Disallow: /
User-agent: CherryPickerSE/1.0
Disallow: /
User-agent: CherryPickerElite/1.0
Disallow: /
User-agent: EmailCollector/1.0
Disallow: /
User-agent: EmailWolf 1.00
Disallow: /
User-agent: Crescent Internet ToolPak HTTP OLE Control v.1.0
Disallow: /
User-agent: EmailSiphon
Disallow: /
User-agent: Mozilla/2.0 (compatible; NEWT ActiveX; Win32)
Disallow: /
# by default
User-agent: *
Disallow: /ccrd # not useful to spiders
Disallow: /click # not useful to spiders
Disallow: /search # dynamic
Disallow: /hc # dynamic
Disallow: /subbatch # dynamic
Disallow: /vadd # dynamic
Disallow: /vsearch # dynamic
Disallow: /vedit # dynamic
Disallow: /vdelete # dynamic
Disallow: /cgi-bin # dynamic
Disallow: /images/ # useless images
Disallow: /icons/ # useless images
Disallow: /concerts/ # deprecated URL form
Disallow: /conferences # defunct
Disallow: /musician # defunct
Disallow: /~skip/volkswagen # defunct
Disallow: /%7Eskip/volkswagen # defunct